diff --git a/lib/puppet/external/pson/common.rb b/lib/puppet/external/pson/common.rb index c45b51417..980df6ece 100644 --- a/lib/puppet/external/pson/common.rb +++ b/lib/puppet/external/pson/common.rb @@ -1,376 +1,370 @@ require 'puppet/external/pson/version' module PSON class << self # If _object_ is string-like parse the string and return the parsed result # as a Ruby data structure. Otherwise generate a PSON text from the Ruby # data structure object and return it. # # The _opts_ argument is passed through to generate/parse respectively, see # generate and parse for their documentation. def [](object, opts = {}) if object.respond_to? :to_str PSON.parse(object.to_str, opts => {}) else PSON.generate(object, opts => {}) end end # Returns the PSON parser class, that is used by PSON. This might be either # PSON::Ext::Parser or PSON::Pure::Parser. attr_reader :parser # Set the PSON parser class _parser_ to be used by PSON. def parser=(parser) # :nodoc: @parser = parser remove_const :Parser if const_defined? :Parser const_set :Parser, parser end # Return the constant located at _path_. # Anything may be registered as a path by calling register_path, above. # Otherwise, the format of _path_ has to be either ::A::B::C or A::B::C. # In either of these cases A has to be defined in Object (e.g. the path # must be an absolute namespace path. If the constant doesn't exist at # the given path, an ArgumentError is raised. def deep_const_get(path) # :nodoc: path = path.to_s path.split(/::/).inject(Object) do |p, c| case when c.empty? then p when p.const_defined?(c) then p.const_get(c) else raise ArgumentError, "can't find const for unregistered document type #{path}" end end end # Set the module _generator_ to be used by PSON. def generator=(generator) # :nodoc: @generator = generator generator_methods = generator::GeneratorMethods for const in generator_methods.constants klass = deep_const_get(const) modul = generator_methods.const_get(const) klass.class_eval do instance_methods(false).each do |m| m.to_s == 'to_pson' and remove_method m end include modul end end self.state = generator::State const_set :State, self.state end # Returns the PSON generator modul, that is used by PSON. This might be # either PSON::Ext::Generator or PSON::Pure::Generator. attr_reader :generator # Returns the PSON generator state class, that is used by PSON. This might # be either PSON::Ext::Generator::State or PSON::Pure::Generator::State. attr_accessor :state # This is create identifier, that is used to decide, if the _pson_create_ # hook of a class should be called. It defaults to 'document_type'. attr_accessor :create_id end self.create_id = 'document_type' NaN = (-1.0) ** 0.5 Infinity = 1.0/0 MinusInfinity = -Infinity # The base exception for PSON errors. class PSONError < StandardError; end # This exception is raised, if a parser error occurs. class ParserError < PSONError; end # This exception is raised, if the nesting of parsed datastructures is too # deep. class NestingError < ParserError; end # This exception is raised, if a generator or unparser error occurs. class GeneratorError < PSONError; end # For backwards compatibility UnparserError = GeneratorError # If a circular data structure is encountered while unparsing # this exception is raised. class CircularDatastructure < GeneratorError; end # This exception is raised, if the required unicode support is missing on the # system. Usually this means, that the iconv library is not installed. class MissingUnicodeSupport < PSONError; end module_function # Parse the PSON string _source_ into a Ruby data structure and return it. # # _opts_ can have the following # keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data # structures. Disable depth checking with :max_nesting => false, it defaults # to 19. # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in # defiance of RFC 4627 to be parsed by the Parser. This option defaults # to false. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matching class and create_id was found. This option - # defaults to true. def parse(source, opts = {}) PSON.parser.new(source, opts).parse end # Parse the PSON string _source_ into a Ruby data structure and return it. # The bang version of the parse method, defaults to the more dangerous values # for the _opts_ hash, so be sure only to parse trusted _source_ strings. # # _opts_ can have the following keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data # structures. Enable depth checking with :max_nesting => anInteger. The parse! # methods defaults to not doing max depth checking: This can be dangerous, # if someone wants to fill up your stack. # * *allow_nan*: If set to true, allow NaN, Infinity, and -Infinity in # defiance of RFC 4627 to be parsed by the Parser. This option defaults # to true. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matching class and create_id was found. This option - # defaults to true. def parse!(source, opts = {}) opts = { :max_nesting => false, :allow_nan => true }.update(opts) PSON.parser.new(source, opts).parse end # Unparse the Ruby data structure _obj_ into a single line PSON string and # return it. _state_ is # * a PSON::State object, # * or a Hash like object (responding to to_hash), # * an object convertible into a hash by a to_h method, # that is used as or to configure a State object. # # It defaults to a state object, that creates the shortest possible PSON text # in one line, checks for circular data structures and doesn't allow NaN, # Infinity, and -Infinity. # # A _state_ hash can have the following keys: # * *indent*: a string used to indent levels (default: ''), # * *space*: a string that is put after, a : or , delimiter (default: ''), # * *space_before*: a string that is put before a : pair delimiter (default: ''), # * *object_nl*: a string that is put at the end of a PSON object (default: ''), # * *array_nl*: a string that is put at the end of a PSON array (default: ''), # * *check_circular*: true if checking for circular data structures # should be done (the default), false otherwise. # * *allow_nan*: true if NaN, Infinity, and -Infinity should be # generated, otherwise an exception is thrown, if these values are # encountered. This options defaults to false. # * *max_nesting*: The maximum depth of nesting allowed in the data # structures from which PSON is to be generated. Disable depth checking # with :max_nesting => false, it defaults to 19. # # See also the fast_generate for the fastest creation method with the least # amount of sanity checks, and the pretty_generate method for some # defaults for a pretty output. def generate(obj, state = nil) if state state = State.from_state(state) else state = State.new end obj.to_pson(state) end # :stopdoc: # I want to deprecate these later, so I'll first be silent about them, and # later delete them. alias unparse generate module_function :unparse # :startdoc: # Unparse the Ruby data structure _obj_ into a single line PSON string and # return it. This method disables the checks for circles in Ruby objects, and # also generates NaN, Infinity, and, -Infinity float values. # # *WARNING*: Be careful not to pass any Ruby data structures with circles as # _obj_ argument, because this will cause PSON to go into an infinite loop. def fast_generate(obj) obj.to_pson(nil) end # :stopdoc: # I want to deprecate these later, so I'll first be silent about them, and later delete them. alias fast_unparse fast_generate module_function :fast_unparse # :startdoc: # Unparse the Ruby data structure _obj_ into a PSON string and return it. The # returned string is a prettier form of the string returned by #unparse. # # The _opts_ argument can be used to configure the generator, see the # generate method for a more detailed explanation. def pretty_generate(obj, opts = nil) state = PSON.state.new( :indent => ' ', :space => ' ', :object_nl => "\n", :array_nl => "\n", :check_circular => true ) if opts if opts.respond_to? :to_hash opts = opts.to_hash elsif opts.respond_to? :to_h opts = opts.to_h else raise TypeError, "can't convert #{opts.class} into Hash" end state.configure(opts) end obj.to_pson(state) end # :stopdoc: # I want to deprecate these later, so I'll first be silent about them, and later delete them. alias pretty_unparse pretty_generate module_function :pretty_unparse # :startdoc: # Load a ruby data structure from a PSON _source_ and return it. A source can # either be a string-like object, an IO like object, or an object responding # to the read method. If _proc_ was given, it will be called with any nested # Ruby object as an argument recursively in depth first order. # # This method is part of the implementation of the load/dump interface of # Marshal and YAML. def load(source, proc = nil) if source.respond_to? :to_str source = source.to_str elsif source.respond_to? :to_io source = source.to_io.read else source = source.read end result = parse(source, :max_nesting => false, :allow_nan => true) recurse_proc(result, &proc) if proc result end def recurse_proc(result, &proc) case result when Array result.each { |x| recurse_proc x, &proc } proc.call result when Hash result.each { |x, y| recurse_proc x, &proc; recurse_proc y, &proc } proc.call result else proc.call result end end private :recurse_proc module_function :recurse_proc alias restore load module_function :restore # Dumps _obj_ as a PSON string, i.e. calls generate on the object and returns # the result. # # If anIO (an IO like object or an object that responds to the write method) # was given, the resulting PSON is written to it. # # If the number of nested arrays or objects exceeds _limit_ an ArgumentError # exception is raised. This argument is similar (but not exactly the # same!) to the _limit_ argument in Marshal.dump. # # This method is part of the implementation of the load/dump interface of # Marshal and YAML. def dump(obj, anIO = nil, limit = nil) if anIO and limit.nil? anIO = anIO.to_io if anIO.respond_to?(:to_io) unless anIO.respond_to?(:write) limit = anIO anIO = nil end end limit ||= 0 result = generate(obj, :allow_nan => true, :max_nesting => limit) if anIO anIO.write result anIO else result end rescue PSON::NestingError raise ArgumentError, "exceed depth limit", $!.backtrace end # Provide a smarter wrapper for changing string encoding that works with # both Ruby 1.8 (iconv) and 1.9 (String#encode). Thankfully they seem to # have compatible input syntax, at least for the encodings we touch. if String.method_defined?("encode") def encode(to, from, string) string.encode(to, from) end else require 'iconv' def encode(to, from, string) Iconv.conv(to, from, string) end end end module ::Kernel private # Outputs _objs_ to STDOUT as PSON strings in the shortest form, that is in # one line. def j(*objs) objs.each do |obj| puts PSON::generate(obj, :allow_nan => true, :max_nesting => false) end nil end # Ouputs _objs_ to STDOUT as PSON strings in a pretty format, with # indentation and over many lines. def jj(*objs) objs.each do |obj| puts PSON::pretty_generate(obj, :allow_nan => true, :max_nesting => false) end nil end # If _object_ is string-like parse the string and return the parsed result as # a Ruby data structure. Otherwise generate a PSON text from the Ruby data # structure object and return it. # # The _opts_ argument is passed through to generate/parse respectively, see # generate and parse for their documentation. def PSON(object, opts = {}) if object.respond_to? :to_str PSON.parse(object.to_str, opts) else PSON.generate(object, opts) end end end class ::Class # Returns true, if this class can be used to create an instance # from a serialised PSON string. The class has to implement a class # method _pson_create_ that expects a hash as first parameter, which includes # the required data. def pson_creatable? respond_to?(:pson_create) end end diff --git a/lib/puppet/external/pson/pure/generator.rb b/lib/puppet/external/pson/pure/generator.rb index 17c98d58c..a4f80a6d2 100644 --- a/lib/puppet/external/pson/pure/generator.rb +++ b/lib/puppet/external/pson/pure/generator.rb @@ -1,394 +1,395 @@ module PSON MAP = { "\x0" => '\u0000', "\x1" => '\u0001', "\x2" => '\u0002', "\x3" => '\u0003', "\x4" => '\u0004', "\x5" => '\u0005', "\x6" => '\u0006', "\x7" => '\u0007', "\b" => '\b', "\t" => '\t', "\n" => '\n', "\xb" => '\u000b', "\f" => '\f', "\r" => '\r', "\xe" => '\u000e', "\xf" => '\u000f', "\x10" => '\u0010', "\x11" => '\u0011', "\x12" => '\u0012', "\x13" => '\u0013', "\x14" => '\u0014', "\x15" => '\u0015', "\x16" => '\u0016', "\x17" => '\u0017', "\x18" => '\u0018', "\x19" => '\u0019', "\x1a" => '\u001a', "\x1b" => '\u001b', "\x1c" => '\u001c', "\x1d" => '\u001d', "\x1e" => '\u001e', "\x1f" => '\u001f', '"' => '\"', '\\' => '\\\\', } # :nodoc: # Convert a UTF8 encoded Ruby string _string_ to a PSON string, encoded with # UTF16 big endian characters as \u????, and return it. if String.method_defined?(:force_encoding) def utf8_to_pson(string) # :nodoc: string = string.dup string << '' # XXX workaround: avoid buffer sharing string.force_encoding(Encoding::ASCII_8BIT) string.gsub!(/["\\\x0-\x1f]/) { MAP[$MATCH] } string rescue => e raise GeneratorError, "Caught #{e.class}: #{e}", e.backtrace end else def utf8_to_pson(string) # :nodoc: string.gsub(/["\\\x0-\x1f]/n) { MAP[$MATCH] } end end module_function :utf8_to_pson module Pure module Generator # This class is used to create State instances, that are use to hold data # while generating a PSON text from a Ruby data structure. class State # Creates a State object from _opts_, which ought to be Hash to create # a new State instance configured by _opts_, something else to create # an unconfigured instance. If _opts_ is a State object, it is just # returned. def self.from_state(opts) case opts when self opts when Hash new(opts) else new end end # Instantiates a new State object, configured by _opts_. # # _opts_ can have the following keys: # # * *indent*: a string used to indent levels (default: ''), # * *space*: a string that is put after, a : or , delimiter (default: ''), # * *space_before*: a string that is put before a : pair delimiter (default: ''), # * *object_nl*: a string that is put at the end of a PSON object (default: ''), # * *array_nl*: a string that is put at the end of a PSON array (default: ''), # * *check_circular*: true if checking for circular data structures # should be done (the default), false otherwise. # * *check_circular*: true if checking for circular data structures # should be done, false (the default) otherwise. # * *allow_nan*: true if NaN, Infinity, and -Infinity should be # generated, otherwise an exception is thrown, if these values are # encountered. This options defaults to false. def initialize(opts = {}) @seen = {} @indent = '' @space = '' @space_before = '' @object_nl = '' @array_nl = '' @check_circular = true @allow_nan = false configure opts end # This string is used to indent levels in the PSON text. attr_accessor :indent # This string is used to insert a space between the tokens in a PSON # string. attr_accessor :space # This string is used to insert a space before the ':' in PSON objects. attr_accessor :space_before # This string is put at the end of a line that holds a PSON object (or # Hash). attr_accessor :object_nl # This string is put at the end of a line that holds a PSON array. attr_accessor :array_nl # This integer returns the maximum level of data structure nesting in # the generated PSON, max_nesting = 0 if no maximum is checked. attr_accessor :max_nesting def check_max_nesting(depth) # :nodoc: return if @max_nesting.zero? current_nesting = depth + 1 current_nesting > @max_nesting and raise NestingError, "nesting of #{current_nesting} is too deep" end # Returns true, if circular data structures should be checked, # otherwise returns false. def check_circular? @check_circular end # Returns true if NaN, Infinity, and -Infinity should be considered as # valid PSON and output. def allow_nan? @allow_nan end # Returns _true_, if _object_ was already seen during this generating # run. def seen?(object) @seen.key?(object.__id__) end # Remember _object_, to find out if it was already encountered (if a # cyclic data structure is if a cyclic data structure is rendered). def remember(object) @seen[object.__id__] = true end # Forget _object_ for this generating run. def forget(object) @seen.delete object.__id__ end # Configure this State instance with the Hash _opts_, and return # itself. def configure(opts) @indent = opts[:indent] if opts.key?(:indent) @space = opts[:space] if opts.key?(:space) @space_before = opts[:space_before] if opts.key?(:space_before) @object_nl = opts[:object_nl] if opts.key?(:object_nl) @array_nl = opts[:array_nl] if opts.key?(:array_nl) @check_circular = !!opts[:check_circular] if opts.key?(:check_circular) @allow_nan = !!opts[:allow_nan] if opts.key?(:allow_nan) if !opts.key?(:max_nesting) # defaults to 19 @max_nesting = 19 elsif opts[:max_nesting] @max_nesting = opts[:max_nesting] else @max_nesting = 0 end self end # Returns the configuration instance variables as a hash, that can be # passed to the configure method. def to_h result = {} for iv in %w{indent space space_before object_nl array_nl check_circular allow_nan max_nesting} result[iv.intern] = instance_variable_get("@#{iv}") end result end end module GeneratorMethods module Object # Converts this object to a string (calling #to_s), converts # it to a PSON string, and returns the result. This is a fallback, if no # special method #to_pson was defined for some object. def to_pson(*) to_s.to_pson end end module Hash # Returns a PSON string containing a PSON object, that is unparsed from # this Hash instance. # _state_ is a PSON::State object, that can also be used to configure the # produced PSON string output further. # _depth_ is used to find out nesting depth, to indent accordingly. def to_pson(state = nil, depth = 0, *) if state state = PSON.state.from_state(state) state.check_max_nesting(depth) pson_check_circular(state) { pson_transform(state, depth) } else pson_transform(state, depth) end end private def pson_check_circular(state) if state and state.check_circular? state.seen?(self) and raise PSON::CircularDatastructure, "circular data structures not supported!" state.remember self end yield ensure state and state.forget self end def pson_shift(state, depth) state and not state.object_nl.empty? or return '' state.indent * depth end def pson_transform(state, depth) delim = ',' if state delim << state.object_nl result = '{' result << state.object_nl result << map { |key,value| s = pson_shift(state, depth + 1) s << key.to_s.to_pson(state, depth + 1) s << state.space_before s << ':' s << state.space s << value.to_pson(state, depth + 1) }.join(delim) result << state.object_nl result << pson_shift(state, depth) result << '}' else result = '{' result << map { |key,value| key.to_s.to_pson << ':' << value.to_pson }.join(delim) result << '}' end result end end module Array # Returns a PSON string containing a PSON array, that is unparsed from # this Array instance. # _state_ is a PSON::State object, that can also be used to configure the # produced PSON string output further. # _depth_ is used to find out nesting depth, to indent accordingly. def to_pson(state = nil, depth = 0, *) if state state = PSON.state.from_state(state) state.check_max_nesting(depth) pson_check_circular(state) { pson_transform(state, depth) } else pson_transform(state, depth) end end private def pson_check_circular(state) if state and state.check_circular? state.seen?(self) and raise PSON::CircularDatastructure, "circular data structures not supported!" state.remember self end yield ensure state and state.forget self end def pson_shift(state, depth) state and not state.array_nl.empty? or return '' state.indent * depth end def pson_transform(state, depth) delim = ',' if state delim << state.array_nl result = '[' result << state.array_nl result << map { |value| pson_shift(state, depth + 1) << value.to_pson(state, depth + 1) }.join(delim) result << state.array_nl result << pson_shift(state, depth) result << ']' else '[' << map { |value| value.to_pson }.join(delim) << ']' end end end module Integer # Returns a PSON string representation for this Integer number. def to_pson(*) to_s end end module Float # Returns a PSON string representation for this Float number. def to_pson(state = nil, *) if infinite? || nan? if !state || state.allow_nan? to_s else raise GeneratorError, "#{self} not allowed in PSON" end else to_s end end end module String # This string should be encoded with UTF-8 A call to this method # returns a PSON string encoded with UTF16 big endian characters as # \u????. def to_pson(*) '"' << PSON.utf8_to_pson(self) << '"' end # Module that holds the extinding methods if, the String module is # included. module Extend # Raw Strings are PSON Objects (the raw bytes are stored in an array for the # key "raw"). The Ruby String can be created by this module method. def pson_create(o) o['raw'].pack('C*') end end # Extends _modul_ with the String::Extend module. def self.included(modul) modul.extend Extend end # This method creates a raw object hash, that can be nested into # other data structures and will be unparsed as a raw string. This # method should be used, if you want to convert raw strings to PSON # instead of UTF-8 strings, e.g. binary data. def to_pson_raw_object + # create_id will be ignored during deserialization { PSON.create_id => self.class.name, 'raw' => self.unpack('C*'), } end # This method creates a PSON text from the result of # a call to to_pson_raw_object of this String. def to_pson_raw(*args) to_pson_raw_object.to_pson(*args) end end module TrueClass # Returns a PSON string for true: 'true'. def to_pson(*) 'true' end end module FalseClass # Returns a PSON string for false: 'false'. def to_pson(*) 'false' end end module NilClass # Returns a PSON string for nil: 'null'. def to_pson(*) 'null' end end end end end end diff --git a/lib/puppet/external/pson/pure/parser.rb b/lib/puppet/external/pson/pure/parser.rb index 43c6c5ffb..cf607afc0 100644 --- a/lib/puppet/external/pson/pure/parser.rb +++ b/lib/puppet/external/pson/pure/parser.rb @@ -1,318 +1,307 @@ require 'strscan' module PSON module Pure # This class implements the PSON parser that is used to parse a PSON string # into a Ruby data structure. class Parser < StringScanner STRING = /" ((?:[^\x0-\x1f"\\] | # escaped special characters: \\["\\\/bfnrt] | \\u[0-9a-fA-F]{4} | # match all but escaped special characters: \\[\x20-\x21\x23-\x2e\x30-\x5b\x5d-\x61\x63-\x65\x67-\x6d\x6f-\x71\x73\x75-\xff])*) "/nx INTEGER = /(-?0|-?[1-9]\d*)/ FLOAT = /(-? (?:0|[1-9]\d*) (?: \.\d+(?i:e[+-]?\d+) | \.\d+ | (?i:e[+-]?\d+) ) )/x NAN = /NaN/ INFINITY = /Infinity/ MINUS_INFINITY = /-Infinity/ OBJECT_OPEN = /\{/ OBJECT_CLOSE = /\}/ ARRAY_OPEN = /\[/ ARRAY_CLOSE = /\]/ PAIR_DELIMITER = /:/ COLLECTION_DELIMITER = /,/ TRUE = /true/ FALSE = /false/ NULL = /null/ IGNORE = %r( (?: //[^\n\r]*[\n\r]| # line comments /\* # c-style comments (?: [^*/]| # normal chars /[^*]| # slashes that do not start a nested comment \*[^/]| # asterisks that do not end this comment /(?=\*/) # single slash before this comment's end )* \*/ # the End of this comment |[ \t\r\n]+ # whitespaces: space, horicontal tab, lf, cr )+ )mx UNPARSED = Object.new # Creates a new PSON::Pure::Parser instance for the string _source_. # # It will be configured by the _opts_ hash. _opts_ can have the following # keys: # * *max_nesting*: The maximum depth of nesting allowed in the parsed data # structures. Disable depth checking with :max_nesting => false|nil|0, # it defaults to 19. # * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in # defiance of RFC 4627 to be parsed by the Parser. This option defaults # to false. - # * *create_additions*: If set to false, the Parser doesn't create - # additions even if a matching class and create_id was found. This option - # defaults to true. # * *object_class*: Defaults to Hash # * *array_class*: Defaults to Array def initialize(source, opts = {}) source = convert_encoding source super source if !opts.key?(:max_nesting) # defaults to 19 @max_nesting = 19 elsif opts[:max_nesting] @max_nesting = opts[:max_nesting] else @max_nesting = 0 end @allow_nan = !!opts[:allow_nan] - ca = true - ca = opts[:create_additions] if opts.key?(:create_additions) - @create_id = ca ? PSON.create_id : nil @object_class = opts[:object_class] || Hash @array_class = opts[:array_class] || Array end alias source string # Parses the current PSON string _source_ and returns the complete data # structure as a result. def parse reset obj = nil until eos? case when scan(OBJECT_OPEN) obj and raise ParserError, "source '#{peek(20)}' not in PSON!" @current_nesting = 1 obj = parse_object when scan(ARRAY_OPEN) obj and raise ParserError, "source '#{peek(20)}' not in PSON!" @current_nesting = 1 obj = parse_array when skip(IGNORE) ; else raise ParserError, "source '#{peek(20)}' not in PSON!" end end obj or raise ParserError, "source did not contain any PSON!" obj end private def convert_encoding(source) if source.respond_to?(:to_str) source = source.to_str else raise TypeError, "#{source.inspect} is not like a string" end if supports_encodings?(source) if source.encoding == ::Encoding::ASCII_8BIT b = source[0, 4].bytes.to_a source = case when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[0] == 0 && b[2] == 0 source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8) when b.size >= 4 && b[1] == 0 && b[3] == 0 source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8) else source.dup end else source = source.encode(::Encoding::UTF_8) end source.force_encoding(::Encoding::ASCII_8BIT) else b = source source = case when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0 PSON.encode('utf-8', 'utf-32be', b) when b.size >= 4 && b[0] == 0 && b[2] == 0 PSON.encode('utf-8', 'utf-16be', b) when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0 PSON.encode('utf-8', 'utf-32le', b) when b.size >= 4 && b[1] == 0 && b[3] == 0 PSON.encode('utf-8', 'utf-16le', b) else b end end source end def supports_encodings?(string) # Some modules, such as REXML on 1.8.7 (see #22804) can actually create # a top-level Encoding constant when they are misused. Therefore # checking for just that constant is not enough, so we'll be a bit more # robust about if we can actually support encoding transformations. string.respond_to?(:encoding) && defined?(::Encoding) end # Unescape characters in strings. UNESCAPE_MAP = Hash.new { |h, k| h[k] = k.chr } UNESCAPE_MAP.update( { ?" => '"', ?\\ => '\\', ?/ => '/', ?b => "\b", ?f => "\f", ?n => "\n", ?r => "\r", ?t => "\t", ?u => nil, }) def parse_string if scan(STRING) return '' if self[1].empty? string = self[1].gsub(%r{(?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff])}n) do |c| if u = UNESCAPE_MAP[$MATCH[1]] u else # \uXXXX bytes = '' i = 0 while c[6 * i] == ?\\ && c[6 * i + 1] == ?u bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) i += 1 end PSON.encode('utf-8', 'utf-16be', bytes) end end string.force_encoding(Encoding::UTF_8) if string.respond_to?(:force_encoding) string else UNPARSED end rescue => e raise GeneratorError, "Caught #{e.class}: #{e}", e.backtrace end def parse_value case when scan(FLOAT) Float(self[1]) when scan(INTEGER) Integer(self[1]) when scan(TRUE) true when scan(FALSE) false when scan(NULL) nil when (string = parse_string) != UNPARSED string when scan(ARRAY_OPEN) @current_nesting += 1 ary = parse_array @current_nesting -= 1 ary when scan(OBJECT_OPEN) @current_nesting += 1 obj = parse_object @current_nesting -= 1 obj when @allow_nan && scan(NAN) NaN when @allow_nan && scan(INFINITY) Infinity when @allow_nan && scan(MINUS_INFINITY) MinusInfinity else UNPARSED end end def parse_array raise NestingError, "nesting of #@current_nesting is too deep" if @max_nesting.nonzero? && @current_nesting > @max_nesting result = @array_class.new delim = false until eos? case when (value = parse_value) != UNPARSED delim = false result << value skip(IGNORE) if scan(COLLECTION_DELIMITER) delim = true elsif match?(ARRAY_CLOSE) ; else raise ParserError, "expected ',' or ']' in array at '#{peek(20)}'!" end when scan(ARRAY_CLOSE) raise ParserError, "expected next element in array at '#{peek(20)}'!" if delim break when skip(IGNORE) ; else raise ParserError, "unexpected token in array at '#{peek(20)}'!" end end result end def parse_object raise NestingError, "nesting of #@current_nesting is too deep" if @max_nesting.nonzero? && @current_nesting > @max_nesting result = @object_class.new delim = false until eos? case when (string = parse_string) != UNPARSED skip(IGNORE) raise ParserError, "expected ':' in object at '#{peek(20)}'!" unless scan(PAIR_DELIMITER) skip(IGNORE) unless (value = parse_value).equal? UNPARSED result[string] = value delim = false skip(IGNORE) if scan(COLLECTION_DELIMITER) delim = true elsif match?(OBJECT_CLOSE) ; else raise ParserError, "expected ',' or '}' in object at '#{peek(20)}'!" end else raise ParserError, "expected value in object at '#{peek(20)}'!" end when scan(OBJECT_CLOSE) raise ParserError, "expected next name, value pair in object at '#{peek(20)}'!" if delim - if @create_id and klassname = result[@create_id] - klass = PSON.deep_const_get klassname - break unless klass and klass.pson_creatable? - result = klass.pson_create(result) - end break when skip(IGNORE) ; else raise ParserError, "unexpected token in object at '#{peek(20)}'!" end end result end end end end diff --git a/spec/unit/external/pson_spec.rb b/spec/unit/external/pson_spec.rb index 94d6135fb..b7ba2f8a0 100755 --- a/spec/unit/external/pson_spec.rb +++ b/spec/unit/external/pson_spec.rb @@ -1,55 +1,62 @@ #! /usr/bin/env ruby # Encoding: UTF-8 require 'spec_helper' require 'puppet/external/pson/common' describe PSON do { 'foo' => '"foo"', 1 => '1', "\x80" => "\"\x80\"", [] => '[]' }.each do |str, expect| it "should be able to encode #{str.inspect}" do got = str.to_pson if got.respond_to? :force_encoding got.force_encoding('binary').should == expect.force_encoding('binary') else got.should == expect end end end it "should be able to handle arbitrary binary data" do bin_string = (1..20000).collect { |i| ((17*i+13*i*i) % 255).chr }.join parsed = PSON.parse(%Q{{ "type": "foo", "data": #{bin_string.to_pson} }})["data"] if parsed.respond_to? :force_encoding parsed.force_encoding('binary') bin_string.force_encoding('binary') end parsed.should == bin_string end it "should be able to handle UTF8 that isn't a real unicode character" do s = ["\355\274\267"] PSON.parse( [s].to_pson ).should == [s] end it "should be able to handle UTF8 for \\xFF" do s = ["\xc3\xbf"] PSON.parse( [s].to_pson ).should == [s] end it "should be able to handle invalid UTF8 bytes" do s = ["\xc3\xc3"] PSON.parse( [s].to_pson ).should == [s] end it "should be able to parse JSON containing UTF-8 characters in strings" do s = '{ "foö": "bár" }' lambda { PSON.parse s }.should_not raise_error end + + it 'ignores "document_type" during parsing' do + text = '{"data":{},"document_type":"Node"}' + + expect(PSON.parse(text)) + .to eq({"data" => {}, "document_type" => "Node"}) + end end