diff --git a/lib/puppet/util/zaml.rb b/lib/puppet/util/zaml.rb index 1d72c7e09..592badae5 100644 --- a/lib/puppet/util/zaml.rb +++ b/lib/puppet/util/zaml.rb @@ -1,360 +1,362 @@ # encoding: UTF-8 # # The above encoding line is a magic comment to set the default source encoding # of this file for the Ruby interpreter. It must be on the first or second # line of the file if an interpreter is in use. In Ruby 1.9 and later, the # source encoding determines the encoding of String and Regexp objects created # from this source file. This explicit encoding is important becuase otherwise # Ruby will pick an encoding based on LANG or LC_CTYPE environment variables. # These may be different from site to site so it's important for us to # establish a consistent behavior. For more information on M17n please see: # http://links.puppetlabs.com/understanding_m17n # ZAML -- A partial replacement for YAML, writen with speed and code clarity # in mind. ZAML fixes one YAML bug (loading Exceptions) and provides # a replacement for YAML.dump unimaginatively called ZAML.dump, # which is faster on all known cases and an order of magnitude faster # with complex structures. # # http://github.com/hallettj/zaml # # Authors: Markus Roberts, Jesse Hallett, Ian McIntosh, Igal Koshevoy, Simon Chiang # require 'yaml' class ZAML VERSION = "0.1.1" # # Class Methods # def self.dump(stuff, where='') z = new stuff.to_zaml(z) where << z.to_s end # # Instance Methods # def initialize @result = [] @indent = nil @structured_key_prefix = nil @previously_emitted_object = {} @next_free_label_number = 0 emit('--- ') end def nested(tail=' ') old_indent = @indent @indent = "#{@indent || "\n"}#{tail}" yield @indent = old_indent end class Label # # YAML only wants objects in the datastream once; if the same object # occurs more than once, we need to emit a label ("&idxxx") on the # first occurrence and then emit a back reference (*idxxx") on any # subsequent occurrence(s). # # To accomplish this we keeps a hash (by object id) of the labels of # the things we serialize as we begin to serialize them. The labels # initially serialize as an empty string (since most objects are only # going to be be encountered once), but can be changed to a valid # (by assigning it a number) the first time it is subsequently used, # if it ever is. Note that we need to do the label setup BEFORE we # start to serialize the object so that circular structures (in # which we will encounter a reference to the object as we serialize # it can be handled). # attr_accessor :this_label_number def initialize(obj,indent) @indent = indent @this_label_number = nil @obj = obj # prevent garbage collection so that object id isn't reused end def to_s @this_label_number ? ('&id%03d%s' % [@this_label_number, @indent]) : '' end def reference @reference ||= '*id%03d' % @this_label_number end end def label_for(obj) @previously_emitted_object[obj.object_id] end def new_label_for(obj) label = Label.new(obj,(Hash === obj || Array === obj) ? "#{@indent || "\n"} " : ' ') @previously_emitted_object[obj.object_id] = label label end def first_time_only(obj) if label = label_for(obj) label.this_label_number ||= (@next_free_label_number += 1) emit(label.reference) else if @structured_key_prefix and not obj.is_a? String emit(@structured_key_prefix) @structured_key_prefix = nil end emit(new_label_for(obj)) yield end end def emit(s) @result << s @recent_nl = false unless s.kind_of?(Label) end def nl(s='') emit(@indent || "\n") unless @recent_nl emit(s) @recent_nl = true end def to_s @result.join end def prefix_structured_keys(x) @structured_key_prefix = x yield nl unless @structured_key_prefix @structured_key_prefix = nil end end ################################################################ # # Behavior for custom classes # ################################################################ class Object def to_yaml_properties instance_variables.sort # Default YAML behavior end def yaml_property_munge(x) x end def zamlized_class_name(root) cls = self.class "!ruby/#{root.name.downcase}#{cls == root ? '' : ":#{cls.respond_to?(:name) ? cls.name : cls}"}" end def to_zaml(z) z.first_time_only(self) { z.emit(zamlized_class_name(Object)) z.nested { instance_variables = to_yaml_properties if instance_variables.empty? z.emit(" {}") else instance_variables.each { |v| z.nl v.to_s[1..-1].to_zaml(z) # Remove leading '@' z.emit(': ') yaml_property_munge(instance_variable_get(v)).to_zaml(z) } end } } end end ################################################################ # # Behavior for built-in classes # ################################################################ class NilClass def to_zaml(z) z.emit('') # NOTE: blank turns into nil in YAML.load end end class Symbol def to_zaml(z) z.emit(self.inspect) end end class TrueClass def to_zaml(z) z.emit('true') end end class FalseClass def to_zaml(z) z.emit('false') end end class Numeric def to_zaml(z) z.emit(self) end end class Regexp def to_zaml(z) z.first_time_only(self) { z.emit("#{zamlized_class_name(Regexp)} #{inspect}") } end end class Exception def to_zaml(z) z.emit(zamlized_class_name(Exception)) z.nested { z.nl("message: ") message.to_zaml(z) } end # # Monkey patch for buggy Exception restore in YAML # # This makes it work for now but is not very future-proof; if things # change we'll most likely want to remove this. To mitigate the risks # as much as possible, we test for the bug before appling the patch. # if respond_to? :yaml_new and yaml_new(self, :tag, "message" => "blurp").message != "blurp" def self.yaml_new( klass, tag, val ) o = YAML.object_maker( klass, {} ).exception(val.delete( 'message')) val.each_pair do |k,v| o.instance_variable_set("@#{k}", v) end o end end end class String ZAML_ESCAPES = %w{\x00 \x01 \x02 \x03 \x04 \x05 \x06 \a \x08 \t \n \v \f \r \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \e \x1c \x1d \x1e \x1f } def escaped_for_zaml # JJM (Note the trailing dots to construct a multi-line method chain.) This # code is meant to escape all bytes which are not ASCII-8BIT printable # characters. Multi-byte unicode characters are handled just fine because # each byte of the character results in an escaped string emitted to the # YAML stream. When the YAML is de-serialized back into a String the bytes # will be reconstructed properly into the unicode character. self.to_ascii8bit.gsub( /\x5C/n, "\\\\\\" ). # Demi-kludge for Maglev/rubinius; the regexp should be /\\/ but parsetree chokes on that. gsub( /"/n, "\\\"" ). gsub( /([\x00-\x1F])/n ) { |x| ZAML_ESCAPES[ x.unpack("C")[0] ] } end def to_zaml(z) z.first_time_only(self) { - num = '[-+]?(0x)?\d+\.?\d*' + hex_num = '0x[a-f\d]+' + float = '\d+\.?\d*' + num = "[-+]?(?:#{float}|#{hex_num})" case when self == '' z.emit('""') when self.to_ascii8bit !~ /\A(?: # ?: non-capturing group (grouping with no back references) [\x09\x0A\x0D\x20-\x7E] # ASCII | [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3 | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15 | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16 )*\z/mnx z.emit("!binary ") z.emit([self].pack("m*")) when ( (self =~ /\A(true|false|yes|no|on|null|off|#{num}(:#{num})*|!|=|~)$/i) or (self =~ /\A\n* /) or (self =~ /[\s:]$/) or (self =~ /^[>|][-+\d]*\s/i) or (self[-1..-1] =~ /\s/) or (self =~ /[,\[\]\{\}\r\t]|:\s|\s#/) or (self =~ /\A([-:?!#&*'"]|<<|%.+:.)/) ) z.emit("\"#{escaped_for_zaml}\"") when self =~ /\n/ if self[-1..-1] == "\n" then z.emit('|+') else z.emit('|-') end z.nested { split("\n",-1).each { |line| z.nl; z.emit(line.chomp("\n")) } } else z.emit(self) end } end # Return a guranteed ASCII-8BIT encoding for Ruby 1.9 This is a helper # method for other methods that perform regular expressions against byte # sequences deliberately rather than dealing with characters. # The method may or may not return a new instance. def to_ascii8bit if self.respond_to?(:encoding) and self.encoding.name != "ASCII-8BIT" then str = self.dup str.force_encoding("ASCII-8BIT") return str else return self end end end class Hash def to_zaml(z) z.first_time_only(self) { z.nested { if empty? z.emit('{}') else each_pair { |k, v| z.nl z.prefix_structured_keys('? ') { k.to_zaml(z) } z.emit(': ') v.to_zaml(z) } end } } end end class Array def to_zaml(z) z.first_time_only(self) { z.nested { if empty? z.emit('[]') else each { |v| z.nl('- '); v.to_zaml(z) } end } } end end class Time def to_zaml(z) # 2008-12-06 10:06:51.373758 -07:00 ms = ("%0.6f" % (usec * 1e-6)).sub(/^\d+\./,'') offset = "%+0.2i:%0.2i" % [utc_offset / 3600, (utc_offset / 60) % 60] z.emit(self.strftime("%Y-%m-%d %H:%M:%S.#{ms} #{offset}")) end end class Date def to_zaml(z) z.emit(strftime('%Y-%m-%d')) end end class Range def to_zaml(z) z.first_time_only(self) { z.emit(zamlized_class_name(Range)) z.nested { z.nl z.emit('begin: ') z.emit(first) z.nl z.emit('end: ') z.emit(last) z.nl z.emit('excl: ') z.emit(exclude_end?) } } end end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 7ead01d2a..7eac86359 100755 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -1,101 +1,104 @@ # NOTE: a lot of the stuff in this file is duplicated in the "puppet_spec_helper" in the project # puppetlabs_spec_helper. We should probably eat our own dog food and get rid of most of this from here, # and have the puppet core itself use puppetlabs_spec_helper dir = File.expand_path(File.dirname(__FILE__)) $LOAD_PATH.unshift File.join(dir, 'lib') # Don't want puppet getting the command line arguments for rake or autotest ARGV.clear require 'puppet' require 'mocha' gem 'rspec', '>=2.0.0' require 'rspec/expectations' # So everyone else doesn't have to include this base constant. module PuppetSpec FIXTURE_DIR = File.join(dir = File.expand_path(File.dirname(__FILE__)), "fixtures") unless defined?(FIXTURE_DIR) end require 'pathname' require 'tmpdir' require 'puppet_spec/verbose' require 'puppet_spec/files' require 'puppet_spec/settings' require 'puppet_spec/fixtures' require 'puppet_spec/matchers' require 'puppet_spec/database' require 'monkey_patches/alias_should_to_must' require 'monkey_patches/publicize_methods' require 'puppet/test/test_helper' Pathname.glob("#{dir}/shared_contexts/*.rb") do |file| require file.relative_path_from(Pathname.new(dir)) end Pathname.glob("#{dir}/shared_behaviours/**/*.rb") do |behaviour| require behaviour.relative_path_from(Pathname.new(dir)) end RSpec.configure do |config| include PuppetSpec::Fixtures config.mock_with :mocha config.before :all do Puppet::Test::TestHelper.before_all_tests() end config.after :all do Puppet::Test::TestHelper.after_all_tests() end config.before :each do # Disabling garbage collection inside each test, and only running it at # the end of each block, gives us an ~ 15 percent speedup, and more on # some platforms *cough* windows *cough* that are a little slower. GC.disable # REVISIT: I think this conceals other bad tests, but I don't have time to # fully diagnose those right now. When you read this, please come tell me # I suck for letting this float. --daniel 2011-04-21 Signal.stubs(:trap) - # TODO: in a saner world, we'd move this logging redirection into our TestHelper class. + # TODO: in a more sane world, we'd move this logging redirection into our TestHelper class. # Without doing so, external projects will all have to roll their own solution for # redirecting logging, and for validating expected log messages. However, because the # current implementation of this involves creating an instance variable "@logs" on # EVERY SINGLE TEST CLASS, and because there are over 1300 tests that are written to expect # this instance variable to be available--we can't easily solve this problem right now. # # redirecting logging away from console, because otherwise the test output will be # obscured by all of the log output @logs = [] Puppet::Util::Log.newdestination(Puppet::Test::LogCollector.new(@logs)) @log_level = Puppet::Util::Log.level Puppet::Test::TestHelper.before_each_test() end config.after :each do Puppet::Test::TestHelper.after_each_test() + # TODO: would like to move this into puppetlabs_spec_helper, but there are namespace issues at the moment. + PuppetSpec::Files.cleanup + # TODO: this should be abstracted in the future--see comments above the '@logs' block in the # "before" code above. # # clean up after the logging changes that we made before each test. @logs.clear Puppet::Util::Log.close_all Puppet::Util::Log.level = @log_level # This will perform a GC between tests, but only if actually required. We # experimented with forcing a GC run, and that was less efficient than # just letting it run all the time. GC.enable end end diff --git a/spec/unit/util/zaml_spec.rb b/spec/unit/util/zaml_spec.rb index ea562a205..ba5d6b259 100755 --- a/spec/unit/util/zaml_spec.rb +++ b/spec/unit/util/zaml_spec.rb @@ -1,123 +1,130 @@ #!/usr/bin/env rspec # encoding: UTF-8 # # The above encoding line is a magic comment to set the default source encoding # of this file for the Ruby interpreter. It must be on the first or second # line of the file if an interpreter is in use. In Ruby 1.9 and later, the # source encoding determines the encoding of String and Regexp objects created # from this source file. This explicit encoding is important becuase otherwise # Ruby will pick an encoding based on LANG or LC_CTYPE environment variables. # These may be different from site to site so it's important for us to # establish a consistent behavior. For more information on M17n please see: # http://links.puppetlabs.com/understanding_m17n require 'spec_helper' require 'puppet/util/monkey_patches' describe "Pure ruby yaml implementation" do { 7 => "--- 7", 3.14159 => "--- 3.14159", + "3.14159" => '--- "3.14159"', + "+3.14159" => '--- "+3.14159"', + "0x123abc" => '--- "0x123abc"', + "-0x123abc" => '--- "-0x123abc"', + "-0x123" => '--- "-0x123"', + "+0x123" => '--- "+0x123"', + "0x123.456" => "--- 0x123.456", 'test' => "--- test", [] => "--- []", :symbol => "--- !ruby/sym symbol", {:a => "A"} => "--- \n !ruby/sym a: A", {:a => "x\ny"} => "--- \n !ruby/sym a: |-\n x\n y" }.each { |o,y| it "should convert the #{o.class} #{o.inspect} to yaml" do o.to_yaml.should == y end it "should produce yaml for the #{o.class} #{o.inspect} that can be reconstituted" do YAML.load(o.to_yaml).should == o end } # # Can't test for equality on raw objects { Object.new => "--- !ruby/object {}", [Object.new] => "--- \n - !ruby/object {}", {Object.new => Object.new} => "--- \n ? !ruby/object {}\n : !ruby/object {}" }.each { |o,y| it "should convert the #{o.class} #{o.inspect} to yaml" do o.to_yaml.should == y end it "should produce yaml for the #{o.class} #{o.inspect} that can be reconstituted" do lambda { YAML.load(o.to_yaml) }.should_not raise_error end } it "should emit proper labels and backreferences for common objects" do # Note: this test makes assumptions about the names ZAML chooses # for labels. x = [1, 2] y = [3, 4] z = [x, y, x, y] z.to_yaml.should == "--- \n - &id001\n - 1\n - 2\n - &id002\n - 3\n - 4\n - *id001\n - *id002" z2 = YAML.load(z.to_yaml) z2.should == z z2[0].should equal(z2[2]) z2[1].should equal(z2[3]) end it "should emit proper labels and backreferences for recursive objects" do x = [1, 2] x << x x.to_yaml.should == "--- &id001\n \n - 1\n - 2\n - *id001" x2 = YAML.load(x.to_yaml) x2.should be_a(Array) x2.length.should == 3 x2[0].should == 1 x2[1].should == 2 x2[2].should equal(x2) end end # Note, many of these tests will pass on Ruby 1.8 but fail on 1.9 if the patch # fix is not applied to Puppet or there's a regression. These version # dependant failures are intentional since the string encoding behavior changed # significantly in 1.9. describe "UTF-8 encoded String#to_yaml (Bug #11246)" do # JJM All of these snowmen are different representations of the same # UTF-8 encoded string. let(:snowman) { 'Snowman: [☃]' } let(:snowman_escaped) { "Snowman: [\xE2\x98\x83]" } describe "UTF-8 String Literal" do subject { snowman } it "should serialize to YAML" do subject.to_yaml end it "should serialize and deserialize to the same thing" do YAML.load(subject.to_yaml).should == subject end it "should serialize and deserialize to a String compatible with a UTF-8 encoded Regexp" do YAML.load(subject.to_yaml).should =~ /☃/u end end end describe "binary data" do subject { "M\xC0\xDF\xE5tt\xF6" } it "should not explode encoding binary data" do expect { subject.to_yaml }.not_to raise_error end it "should mark the binary data as binary" do subject.to_yaml.should =~ /!binary/ end it "should round-trip the data" do yaml = subject.to_yaml read = YAML.load(yaml) if read.respond_to? :force_encoding read.force_encoding('binary') subject.force_encoding('binary') end read.should == subject end end