diff --git a/lib/puppet/pops/issues.rb b/lib/puppet/pops/issues.rb index c1bf23abc..4c496cda4 100644 --- a/lib/puppet/pops/issues.rb +++ b/lib/puppet/pops/issues.rb @@ -1,565 +1,648 @@ # Defines classes to deal with issues, and message formatting and defines constants with Issues. # @api public # module Puppet::Pops::Issues # Describes an issue, and can produce a message for an occurrence of the issue. # class Issue # The issue code # @return [Symbol] attr_reader :issue_code # A block producing the message # @return [Proc] attr_reader :message_block # Names that must be bound in an occurrence of the issue to be able to produce a message. # These are the names in addition to requirements stipulated by the Issue formatter contract; i.e. :label`, # and `:semantic`. # attr_reader :arg_names # If this issue can have its severity lowered to :warning, :deprecation, or :ignored attr_writer :demotable # Configures the Issue with required arguments (bound by occurrence), and a block producing a message. def initialize issue_code, *args, &block @issue_code = issue_code @message_block = block @arg_names = args @demotable = true end # Returns true if it is allowed to demote this issue def demotable? @demotable end # Formats a message for an occurrence of the issue with argument bindings passed in a hash. # The hash must contain a LabelProvider bound to the key `label` and the semantic model element # bound to the key `semantic`. All required arguments as specified by `arg_names` must be bound # in the given `hash`. # @api public # def format(hash ={}) # Create a Message Data where all hash keys become methods for convenient interpolation # in issue text. msgdata = MessageData.new(*arg_names) begin # Evaluate the message block in the msg data's binding msgdata.format(hash, &message_block) rescue StandardError => e Puppet::Pops::Issues::MessageData raise RuntimeError, "Error while reporting issue: #{issue_code}. #{e.message}", caller end end end # Provides a binding of arguments passed to Issue.format to method names available # in the issue's message producing block. # @api private # class MessageData def initialize *argnames singleton = class << self; self end argnames.each do |name| singleton.send(:define_method, name) do @data[name] end end end def format(hash, &block) @data = hash instance_eval &block end # Returns the label provider given as a key in the hash passed to #format. # If given an argument, calls #label on the label provider (caller would otherwise have to # call label.label(it) # def label(it = nil) raise "Label provider key :label must be set to produce the text of the message!" unless @data[:label] it.nil? ? @data[:label] : @data[:label].label(it) end # Returns the label provider given as a key in the hash passed to #format. # def semantic raise "Label provider key :semantic must be set to produce the text of the message!" unless @data[:semantic] @data[:semantic] end end # Defines an issue with the given `issue_code`, additional required parameters, and a block producing a message. # The block is evaluated in the context of a MessageData which provides convenient access to all required arguments # via accessor methods. In addition to accessors for specified arguments, these are also available: # * `label` - a `LabelProvider` that provides human understandable names for model elements and production of article (a/an/the). # * `semantic` - the model element for which the issue is reported # # @param issue_code [Symbol] the issue code for the issue used as an identifier, should be the same as the constant # the issue is bound to. # @param args [Symbol] required arguments that must be passed when formatting the message, may be empty # @param block [Proc] a block producing the message string, evaluated in a MessageData scope. The produced string # should not end with a period as additional information may be appended. # # @see MessageData # @api public # def self.issue (issue_code, *args, &block) Issue.new(issue_code, *args, &block) end # Creates a non demotable issue. # @see Issue.issue # def self.hard_issue(issue_code, *args, &block) result = Issue.new(issue_code, *args, &block) result.demotable = false result end # @comment Here follows definitions of issues. The intent is to provide a list from which yardoc can be generated # containing more detailed information / explanation of the issue. # These issues are set as constants, but it is unfortunately not possible for the created object to easily know which # name it is bound to. Instead the constant has to be repeated. (Alternatively, it could be done by instead calling # #const_set on the module, but the extra work required to get yardoc output vs. the extra effort to repeat the name # twice makes it not worth it (if doable at all, since there is no tag to artificially construct a constant, and # the parse tag does not produce any result for a constant assignment). # This is allowed (3.1) and has not yet been deprecated. # @todo configuration # NAME_WITH_HYPHEN = issue :NAME_WITH_HYPHEN, :name do "#{label.a_an_uc(semantic)} may not have a name containing a hyphen. The name '#{name}' is not legal" end # When a variable name contains a hyphen and these are illegal. # It is possible to control if a hyphen is legal in a name or not using the setting TODO # @todo describe the setting # @api public # @todo configuration if this is error or warning # VAR_WITH_HYPHEN = issue :VAR_WITH_HYPHEN, :name do "A variable name may not contain a hyphen. The name '#{name}' is not legal" end # A class, definition, or node may only appear at top level or inside other classes # @todo Is this really true for nodes? Can they be inside classes? Isn't that too late? # @api public # NOT_TOP_LEVEL = hard_issue :NOT_TOP_LEVEL do "Classes, definitions, and nodes may only appear at toplevel or inside other classes" end CROSS_SCOPE_ASSIGNMENT = hard_issue :CROSS_SCOPE_ASSIGNMENT, :name do "Illegal attempt to assign to '#{name}'. Cannot assign to variables in other namespaces" end # Assignment can only be made to certain types of left hand expressions such as variables. ILLEGAL_ASSIGNMENT = hard_issue :ILLEGAL_ASSIGNMENT do "Illegal attempt to assign to '#{label.a_an(semantic)}'. Not an assignable reference" end # Variables are immutable, cannot reassign in the same assignment scope ILLEGAL_REASSIGNMENT = hard_issue :ILLEGAL_REASSIGNMENT, :name do "Cannot reassign variable #{name}" end ILLEGAL_RESERVED_ASSIGNMENT = hard_issue :ILLEGAL_RESERVED_ASSIGNMENT, :name do "Attempt to assign to a reserved variable name: '#{name}'" end # Assignment cannot be made to numeric match result variables ILLEGAL_NUMERIC_ASSIGNMENT = issue :ILLEGAL_NUMERIC_ASSIGNMENT, :varname do "Illegal attempt to assign to the numeric match result variable '$#{varname}'. Numeric variables are not assignable" end # parameters cannot have numeric names, clashes with match result variables ILLEGAL_NUMERIC_PARAMETER = issue :ILLEGAL_NUMERIC_PARAMETER, :name do "The numeric parameter name '$#{name}' cannot be used (clashes with numeric match result variables)" end # In certain versions of Puppet it may be allowed to assign to a not already assigned key # in an array or a hash. This is an optional validation that may be turned on to prevent accidental # mutation. # ILLEGAL_INDEXED_ASSIGNMENT = issue :ILLEGAL_INDEXED_ASSIGNMENT do "Illegal attempt to assign via [index/key]. Not an assignable reference" end # When indexed assignment ($x[]=) is allowed, the leftmost expression must be # a variable expression. # ILLEGAL_ASSIGNMENT_VIA_INDEX = hard_issue :ILLEGAL_ASSIGNMENT_VIA_INDEX do "Illegal attempt to assign to #{label.a_an(semantic)} via [index/key]. Not an assignable reference" end APPENDS_DELETES_NO_LONGER_SUPPORTED = hard_issue :APPENDS_DELETES_NO_LONGER_SUPPORTED, :operator do "The operator '#{operator}' is no longer supported. See http://links.puppetlabs.com/remove-plus-equals" end # For unsupported operators (e.g. += and -= in puppet 4). # UNSUPPORTED_OPERATOR = hard_issue :UNSUPPORTED_OPERATOR, :operator do "The operator '#{operator}' is not supported." end # For operators that are not supported in specific contexts (e.g. '* =>' in # resource defaults) # UNSUPPORTED_OPERATOR_IN_CONTEXT = hard_issue :UNSUPPORTED_OPERATOR_IN_CONTEXT, :operator do "The operator '#{operator}' in #{label.a_an(semantic)} is not supported." end # For non applicable operators (e.g. << on Hash). # OPERATOR_NOT_APPLICABLE = hard_issue :OPERATOR_NOT_APPLICABLE, :operator, :left_value do "Operator '#{operator}' is not applicable to #{label.a_an(left_value)}." end COMPARISON_NOT_POSSIBLE = hard_issue :COMPARISON_NOT_POSSIBLE, :operator, :left_value, :right_value, :detail do "Comparison of: #{label(left_value)} #{operator} #{label(right_value)}, is not possible. Caused by '#{detail}'." end MATCH_NOT_REGEXP = hard_issue :MATCH_NOT_REGEXP, :detail do "Can not convert right match operand to a regular expression. Caused by '#{detail}'." end MATCH_NOT_STRING = hard_issue :MATCH_NOT_STRING, :left_value do "Left match operand must result in a String value. Got #{label.a_an(left_value)}." end # Some expressions/statements may not produce a value (known as right-value, or rvalue). # This may vary between puppet versions. # NOT_RVALUE = issue :NOT_RVALUE do "Invalid use of expression. #{label.a_an_uc(semantic)} does not produce a value" end # Appending to attributes is only allowed in certain types of resource expressions. # ILLEGAL_ATTRIBUTE_APPEND = hard_issue :ILLEGAL_ATTRIBUTE_APPEND, :name, :parent do "Illegal +> operation on attribute #{name}. This operator can not be used in #{label.a_an(parent)}" end ILLEGAL_NAME = hard_issue :ILLEGAL_NAME, :name do "Illegal name. The given name #{name} does not conform to the naming rule /^((::)?[a-z_]\w*)(::[a-z]\w*)*$/" end ILLEGAL_VAR_NAME = hard_issue :ILLEGAL_VAR_NAME, :name do "Illegal variable name, The given name '#{name}' does not conform to the naming rule /^((::)?[a-z]\w*)*((::)?[a-z_]\w*)$/" end ILLEGAL_NUMERIC_VAR_NAME = hard_issue :ILLEGAL_NUMERIC_VAR_NAME, :name do "Illegal numeric variable name, The given name '#{name}' must be a decimal value if it starts with a digit 0-9" end # In case a model is constructed programmatically, it must create valid type references. # ILLEGAL_CLASSREF = hard_issue :ILLEGAL_CLASSREF, :name do "Illegal type reference. The given name '#{name}' does not conform to the naming rule" end # This is a runtime issue - storeconfigs must be on in order to collect exported. This issue should be # set to :ignore when just checking syntax. # @todo should be a :warning by default # RT_NO_STORECONFIGS = issue :RT_NO_STORECONFIGS do "You cannot collect exported resources without storeconfigs being set; the collection will be ignored" end # This is a runtime issue - storeconfigs must be on in order to export a resource. This issue should be # set to :ignore when just checking syntax. # @todo should be a :warning by default # RT_NO_STORECONFIGS_EXPORT = issue :RT_NO_STORECONFIGS_EXPORT do "You cannot collect exported resources without storeconfigs being set; the export is ignored" end # A hostname may only contain letters, digits, '_', '-', and '.'. # ILLEGAL_HOSTNAME_CHARS = hard_issue :ILLEGAL_HOSTNAME_CHARS, :hostname do "The hostname '#{hostname}' contains illegal characters (only letters, digits, '_', '-', and '.' are allowed)" end # A hostname may only contain letters, digits, '_', '-', and '.'. # ILLEGAL_HOSTNAME_INTERPOLATION = hard_issue :ILLEGAL_HOSTNAME_INTERPOLATION do "An interpolated expression is not allowed in a hostname of a node" end # Issues when an expression is used where it is not legal. # E.g. an arithmetic expression where a hostname is expected. # ILLEGAL_EXPRESSION = hard_issue :ILLEGAL_EXPRESSION, :feature, :container do "Illegal expression. #{label.a_an_uc(semantic)} is unacceptable as #{feature} in #{label.a_an(container)}" end # Issues when a variable is not a NAME # ILLEGAL_VARIABLE_EXPRESSION = hard_issue :ILLEGAL_VARIABLE_EXPRESSION do "Illegal variable expression. #{label.a_an_uc(semantic)} did not produce a variable name (String or Numeric)." end # Issues when an expression is used illegaly in a query. # query only supports == and !=, and not <, > etc. # ILLEGAL_QUERY_EXPRESSION = hard_issue :ILLEGAL_QUERY_EXPRESSION do "Illegal query expression. #{label.a_an_uc(semantic)} cannot be used in a query" end # If an attempt is made to make a resource default virtual or exported. # NOT_VIRTUALIZEABLE = hard_issue :NOT_VIRTUALIZEABLE do "Resource Defaults are not virtualizable" end # When an attempt is made to use multiple keys (to produce a range in Ruby - e.g. $arr[2,-1]). # This is not supported in 3x, but it allowed in 4x. # UNSUPPORTED_RANGE = issue :UNSUPPORTED_RANGE, :count do "Attempt to use unsupported range in #{label.a_an(semantic)}, #{count} values given for max 1" end ILLEGAL_RELATIONSHIP_OPERAND_TYPE = issue :ILLEGAL_RELATIONSHIP_OPERAND_TYPE, :operand do "Illegal relationship operand, can not form a relationship with #{label.a_an(operand)}. A Catalog type is required." end NOT_CATALOG_TYPE = issue :NOT_CATALOG_TYPE, :type do "Illegal relationship operand, can not form a relationship with something of type #{type}. A Catalog type is required." end BAD_STRING_SLICE_ARITY = issue :BAD_STRING_SLICE_ARITY, :actual do "String supports [] with one or two arguments. Got #{actual}" end BAD_STRING_SLICE_TYPE = issue :BAD_STRING_SLICE_TYPE, :actual do "String-Type [] requires all arguments to be integers (or default). Got #{actual}" end BAD_ARRAY_SLICE_ARITY = issue :BAD_ARRAY_SLICE_ARITY, :actual do "Array supports [] with one or two arguments. Got #{actual}" end BAD_HASH_SLICE_ARITY = issue :BAD_HASH_SLICE_ARITY, :actual do "Hash supports [] with one or more arguments. Got #{actual}" end BAD_INTEGER_SLICE_ARITY = issue :BAD_INTEGER_SLICE_ARITY, :actual do "Integer-Type supports [] with one or two arguments (from, to). Got #{actual}" end BAD_INTEGER_SLICE_TYPE = issue :BAD_INTEGER_SLICE_TYPE, :actual do "Integer-Type [] requires all arguments to be integers (or default). Got #{actual}" end BAD_COLLECTION_SLICE_TYPE = issue :BAD_COLLECTION_SLICE_TYPE, :actual do "A Type's size constraint arguments must be a single Integer type, or 1-2 integers (or default). Got #{label.a_an(actual)}" end BAD_FLOAT_SLICE_ARITY = issue :BAD_INTEGER_SLICE_ARITY, :actual do "Float-Type supports [] with one or two arguments (from, to). Got #{actual}" end BAD_FLOAT_SLICE_TYPE = issue :BAD_INTEGER_SLICE_TYPE, :actual do "Float-Type [] requires all arguments to be floats, or integers (or default). Got #{actual}" end BAD_SLICE_KEY_TYPE = issue :BAD_SLICE_KEY_TYPE, :left_value, :expected_classes, :actual do expected_text = if expected_classes.size > 1 "one of #{expected_classes.join(', ')} are" else "#{expected_classes[0]} is" end "#{label.a_an_uc(left_value)}[] cannot use #{actual} where #{expected_text} expected" end BAD_TYPE_SLICE_TYPE = issue :BAD_TYPE_SLICE_TYPE, :base_type, :actual do "#{base_type}[] arguments must be types. Got #{actual}" end BAD_TYPE_SLICE_ARITY = issue :BAD_TYPE_SLICE_ARITY, :base_type, :min, :max, :actual do base_type_label = base_type.is_a?(String) ? base_type : label.a_an_uc(base_type) if max == -1 || max == 1.0 / 0.0 # Infinity "#{base_type_label}[] accepts #{min} or more arguments. Got #{actual}" elsif max && max != min "#{base_type_label}[] accepts #{min} to #{max} arguments. Got #{actual}" else "#{base_type_label}[] accepts #{min} #{label.plural_s(min, 'argument')}. Got #{actual}" end end BAD_TYPE_SPECIALIZATION = hard_issue :BAD_TYPE_SPECIALIZATION, :type, :message do "Error creating type specialization of #{label.a_an(type)}, #{message}" end ILLEGAL_TYPE_SPECIALIZATION = issue :ILLEGAL_TYPE_SPECIALIZATION, :kind do "Cannot specialize an already specialized #{kind} type" end ILLEGAL_RESOURCE_SPECIALIZATION = issue :ILLEGAL_RESOURCE_SPECIALIZATION, :actual do "First argument to Resource[] must be a resource type or a String. Got #{actual}." end EMPTY_RESOURCE_SPECIALIZATION = issue :EMPTY_RESOURCE_SPECIALIZATION do "Arguments to Resource[] are all empty/undefined" end ILLEGAL_HOSTCLASS_NAME = hard_issue :ILLEGAL_HOSTCLASS_NAME, :name do "Illegal Class name in class reference. #{label.a_an_uc(name)} cannot be used where a String is expected" end ILLEGAL_DEFINITION_NAME = hard_issue :ILLEGAL_DEFINTION_NAME, :name do "Unacceptable name. The name '#{name}' is unacceptable as the name of #{label.a_an(semantic)}" end CAPTURES_REST_NOT_LAST = hard_issue :CAPTURES_REST_NOT_LAST, :param_name do "Parameter $#{param_name} is not last, and has 'captures rest'" end CAPTURES_REST_NOT_SUPPORTED = hard_issue :CAPTURES_REST_NOT_SUPPORTED, :container, :param_name do "Parameter $#{param_name} has 'captures rest' - not supported in #{label.a_an(container)}" end REQUIRED_PARAMETER_AFTER_OPTIONAL = hard_issue :REQUIRED_PARAMETER_AFTER_OPTIONAL, :param_name do "Parameter $#{param_name} is required but appears after optional parameters" end MISSING_REQUIRED_PARAMETER = hard_issue :MISSING_REQUIRED_PARAMETER, :param_name do "Parameter $#{param_name} is required but no value was given" end NOT_NUMERIC = issue :NOT_NUMERIC, :value do "The value '#{value}' cannot be converted to Numeric." end UNKNOWN_FUNCTION = issue :UNKNOWN_FUNCTION, :name do "Unknown function: '#{name}'." end UNKNOWN_VARIABLE = issue :UNKNOWN_VARIABLE, :name do "Unknown variable: '#{name}'." end RUNTIME_ERROR = issue :RUNTIME_ERROR, :detail do "Error while evaluating #{label.a_an(semantic)}, #{detail}" end UNKNOWN_RESOURCE_TYPE = issue :UNKNOWN_RESOURCE_TYPE, :type_name do "Resource type not found: #{type_name.capitalize}" end ILLEGAL_RESOURCE_TYPE = hard_issue :ILLEGAL_RESOURCE_TYPE, :actual do "Illegal Resource Type expression, expected result to be a type name, or untitled Resource, got #{actual}" end DUPLICATE_TITLE = issue :DUPLICATE_TITLE, :title do "The title '#{title}' has already been used in this resource expression" end DUPLICATE_ATTRIBUTE = issue :DUPLICATE_ATTRIBUE, :attribute do "The attribute '#{attribute}' has already been set in this resource body" end MISSING_TITLE = hard_issue :MISSING_TITLE do "Missing title. The title expression resulted in undef" end MISSING_TITLE_AT = hard_issue :MISSING_TITLE_AT, :index do "Missing title at index #{index}. The title expression resulted in an undef title" end ILLEGAL_TITLE_TYPE_AT = hard_issue :ILLEGAL_TITLE_TYPE_AT, :index, :actual do "Illegal title type at index #{index}. Expected String, got #{actual}" end EMPTY_STRING_TITLE_AT = hard_issue :EMPTY_STRING_TITLE_AT, :index do "Empty string title at #{index}. Title strings must have a length greater than zero." end UNKNOWN_RESOURCE = issue :UNKNOWN_RESOURCE, :type_name, :title do "Resource not found: #{type_name.capitalize}['#{title}']" end UNKNOWN_RESOURCE_PARAMETER = issue :UNKNOWN_RESOURCE_PARAMETER, :type_name, :title, :param_name do "The resource #{type_name.capitalize}['#{title}'] does not have a parameter called '#{param_name}'" end DIV_BY_ZERO = hard_issue :DIV_BY_ZERO do "Division by 0" end RESULT_IS_INFINITY = hard_issue :RESULT_IS_INFINITY, :operator do "The result of the #{operator} expression is Infinity" end # TODO_HEREDOC EMPTY_HEREDOC_SYNTAX_SEGMENT = issue :EMPTY_HEREDOC_SYNTAX_SEGMENT, :syntax do "Heredoc syntax specification has empty segment between '+' : '#{syntax}'" end ILLEGAL_EPP_PARAMETERS = issue :ILLEGAL_EPP_PARAMETERS do "Ambiguous EPP parameter expression. Probably missing '<%-' before parameters to remove leading whitespace" end DISCONTINUED_IMPORT = hard_issue :DISCONTINUED_IMPORT do "Use of 'import' has been discontinued in favor of a manifest directory. See http://links.puppetlabs.com/puppet-import-deprecation" end IDEM_EXPRESSION_NOT_LAST = issue :IDEM_EXPRESSION_NOT_LAST do "This #{label.label(semantic)} has no effect. A value-producing expression without other effect may only be placed last in a block/sequence" end IDEM_NOT_ALLOWED_LAST = hard_issue :IDEM_NOT_ALLOWED_LAST, :container do "This #{label.label(semantic)} has no effect. #{label.a_an_uc(container)} can not end with a value-producing expression without other effect" end RESERVED_WORD = hard_issue :RESERVED_WORD, :word do "Use of reserved word: #{word}, must be quoted if intended to be a String value" end RESERVED_TYPE_NAME = hard_issue :RESERVED_TYPE_NAME, :name do "The name: '#{name}' is already defined by Puppet and can not be used as the name of #{label.a_an(semantic)}." end UNMATCHED_SELECTOR = hard_issue :UNMATCHED_SELECTOR, :param_value do "No matching entry for selector parameter with value '#{param_value}'" end ILLEGAL_NODE_INHERITANCE = issue :ILLEGAL_NODE_INHERITANCE do "Node inheritance is not supported in Puppet >= 4.0.0. See http://links.puppetlabs.com/puppet-node-inheritance-deprecation" end ILLEGAL_OVERRIDEN_TYPE = issue :ILLEGAL_OVERRIDEN_TYPE, :actual do "Resource Override can only operate on resources, got: #{label.label(actual)}" end RESERVED_PARAMETER = hard_issue :RESERVED_PARAMETER, :container, :param_name do "The parameter $#{param_name} redefines a built in parameter in #{label.the(container)}" end TYPE_MISMATCH = hard_issue :TYPE_MISMATCH, :expected, :actual do "Expected value of type #{expected}, got #{actual}" end MULTIPLE_ATTRIBUTES_UNFOLD = hard_issue :MULTIPLE_ATTRIBUTES_UNFOLD do "Unfolding of attributes from Hash can only be used once per resource body" end SYNTAX_ERROR = hard_issue :SYNTAX_ERROR, :where do "Syntax error at #{where}" end - LEX_ERROR = hard_issue :LEX_ERROR do - # Error here for completeness. It's never printed - "Unable to create lexical token stream" + ILLEGAL_CLASS_REFERENCE = hard_issue :ILLEGAL_CLASS_REFERENCE do + 'Illegal class reference' + end + + ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE = hard_issue :ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE do + 'Illegal fully qualified class reference' + end + + ILLEGAL_FULLY_QUALIFIED_NAME = hard_issue :ILLEGAL_FULLY_QUALIFIED_NAME do + 'Illegal fully qualified name' + end + + ILLEGAL_NAME_OR_BARE_WORD = hard_issue :ILLEGAL_NAME_OR_BARE_WORD do + 'Illegal name or bare word' + end + + ILLEGAL_NUMBER = hard_issue :ILLEGAL_NUMBER do + 'Illegal number' end ILLEGAL_UNICODE_ESCAPE = issue :ILLEGAL_UNICODE_ESCAPE do "Unicode escape '\\u' was not followed by 4 hex digits" end + INVALID_HEX_NUMBER = hard_issue :INVALID_HEX_NUMBER, :value do + "Not a valid hex number #{value}" + end + + INVALID_OCTAL_NUMBER = hard_issue :INVALID_OCTAL_NUMBER, :value do + "Not a valid octal number #{value}" + end + + INVALID_DECIMAL_NUMBER = hard_issue :INVALID_DECIMAL_NUMBER, :value do + "Not a valid decimal number #{value}" + end + + NO_INPUT_TO_LEXER = hard_issue :NO_INPUT_TO_LEXER do + "Internal Error: No string or file given to lexer to process." + end + UNRECOGNIZED_ESCAPE = issue :UNRECOGNIZED_ESCAPE, :ch do "Unrecognized escape sequence '\\#{ch}'" end + + UNRECOGNIZED_QUOTE = hard_issue :UNRECOGNIZED_QUOTE, :followed_by do + "Unclosed quote after \"'\" followed by '#{followed_by}'" + end + + UNCLOSED_QUOTE = hard_issue :UNCLOSED_QUOTE, :after, :followed_by do + "Unclosed quote after #{after} followed by '#{followed_by}'" + end + + EPP_INTERNAL_ERROR = hard_issue :EPP_INTERNAL_ERROR, :error do + "Internal error: #{error}" + end + + EPP_UNBALANCED_TAG = hard_issue :EPP_UNBALANCED_TAG do + 'Unbalanced epp tag, reached without closing tag.' + end + + EPP_UNBALANCED_COMMENT = hard_issue :EPP_UNBALANCED_COMMENT do + 'Reaching end after opening <%# without seeing %>' + end + + EPP_UNBALANCED_EXPRESSION = hard_issue :EPP_UNBALANCED_EXPRESSION do + 'Unbalanced embedded expression - opening <% and reaching end of input' + end + + HEREDOC_UNCLOSED_PARENTHESIS = hard_issue :HEREDOC_UNCLOSED_PARENTHESIS, :followed_by do + "Unclosed parenthesis after '@(' followed by '#{followed_by}'" + end + + HEREDOC_WITHOUT_END_TAGGED_LINE = hard_issue :HEREDOC_WITHOUT_END_TAGGED_LINE do + 'Heredoc without end-tagged line' + end + + HEREDOC_MISSING_END_TAG = hard_issue :HEREDOC_MISSING_END_TAG do + 'Missing end tag in heredoc' + end + + HEREDOC_INVALID_ESCAPE = hard_issue :HEREDOC_INVALID_ESCAPE, :actual do + "Invalid heredoc escape char. Only t, r, n, s, u, L, $ allowed. Got '#{actual}'" + end + + HEREDOC_INVALID_SYNTAX = hard_issue :HEREDOC_INVALID_SYNTAX do + 'Invalid syntax in heredoc expected @(endtag[:syntax][/escapes])' + end + + HEREDOC_WITHOUT_TEXT = hard_issue :HEREDOC_WITHOUT_TEXT do + 'Heredoc without any following lines of text' + end + + HEREDOC_MULTIPLE_AT_ESCAPES = hard_issue :HEREDOC_MULTIPLE_AT_ESCAPES, :escapes do + "An escape char for @() may only appear once. Got '#{escapes.join(', ')}'" + end end diff --git a/lib/puppet/pops/parser/epp_support.rb b/lib/puppet/pops/parser/epp_support.rb index 9329ec0b8..1fa533cf9 100644 --- a/lib/puppet/pops/parser/epp_support.rb +++ b/lib/puppet/pops/parser/epp_support.rb @@ -1,247 +1,256 @@ # This module is an integral part of the Lexer. # It handles scanning of EPP (Embedded Puppet), a form of string/expression interpolation similar to ERB. # require 'strscan' module Puppet::Pops::Parser::EppSupport TOKEN_RENDER_STRING = [:RENDER_STRING, nil, 0] TOKEN_RENDER_EXPR = [:RENDER_EXPR, nil, 0] # Scans all of the content and returns it in an array # Note that the terminating [false, false] token is included in the result. # def fullscan_epp result = [] scan_epp {|token, value| result.push([token, value]) } result end # A block must be passed to scan. It will be called with two arguments, a symbol for the token, # and an instance of LexerSupport::TokenValue # PERFORMANCE NOTE: The TokenValue is designed to reduce the amount of garbage / temporary data # and to only convert the lexer's internal tokens on demand. It is slightly more costly to create an # instance of a class defined in Ruby than an Array or Hash, but the gain is much bigger since transformation # logic is avoided for many of its members (most are never used (e.g. line/pos information which is only of # value in general for error messages, and for some expressions (which the lexer does not know about). # def scan_epp # PERFORMANCE note: it is faster to access local variables than instance variables. # This makes a small but notable difference since instance member access is avoided for # every token in the lexed content. # scn = @scanner ctx = @lexing_context queue = @token_queue - lex_error "Internal Error: No string or file given to lexer to process." unless scn + lex_error(Puppet::Pops::Issues::EPP_INTERNAL_ERROR, :error => 'No string or file given to lexer to process.') unless scn ctx[:epp_mode] = :text enqueue_completed([:EPP_START, nil, 0], 0) interpolate_epp # This is the lexer's main loop until queue.empty? && scn.eos? do if token = queue.shift || lex_token yield [ ctx[:after] = token[0], token[1] ] end end if ctx[:epp_open_position] - lex_error("Unbalanced epp tag, reached without closing tag.", ctx[:epp_position]) + lex_error(Puppet::Pops::Issues::EPP_UNBALANCED_TAG, {}, ctx[:epp_position]) end # Signals end of input yield [false, false] end def interpolate_epp(skip_leading=false) scn = @scanner ctx = @lexing_context eppscanner = EppScanner.new(scn) before = scn.pos s = eppscanner.scan(skip_leading) case eppscanner.mode when :text # Should be at end of scan, or something is terribly wrong - lex_error("Internal error: template scanner returns text mode and is not and end of input") unless @scanner.eos? + unless @scanner.eos? + lex_error(Puppet::Pops::Issues::EPP_INTERNAL_ERROR, :error => 'template scanner returns text mode and is not and end of input') + end if s # s may be nil if scanned text ends with an epp tag (i.e. no trailing text). enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end ctx[:epp_open_position] = nil # do nothing else, scanner is at the end when :error - lex_error(eppscanner.message()) + lex_error(eppscanner.issue) when :epp # It is meaningless to render empty string segments, and it is harmful to do this at # the start of the scan as it prevents specification of parameters with <%- ($x, $y) -%> # if s && s.length > 0 enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end # switch epp_mode to general (embedded) pp logic (non rendered result) ctx[:epp_mode] = :epp ctx[:epp_open_position] = scn.pos when :expr # It is meaningless to render an empty string segment if s && s.length > 0 enqueue_completed([:RENDER_STRING, s, scn.pos - before], before) end enqueue_completed(TOKEN_RENDER_EXPR, before) # switch mode to "epp expr interpolation" ctx[:epp_mode] = :expr ctx[:epp_open_position] = scn.pos else - lex_error("Internal Error, Unknown mode #{eppscanner.mode} returned by template scanner") + lex_error(Puppet::Pops::Issues::EPP_INTERNAL_ERROR, :error => "Unknown mode #{eppscanner.mode} returned by template scanner") end nil end # A scanner specialized in processing text with embedded EPP (Embedded Puppet) tags. # The scanner is initialized with a StringScanner which it mutates as scanning takes place. # The intent is to use one instance of EppScanner per wanted scan, and this instance represents # the state after the scan. # # @example Sample usage # a = "some text <% pp code %> some more text" # scan = StringScanner.new(a) # eppscan = EppScanner.new(scan) # str = eppscan.scan # eppscan.mode # => :epp # eppscan.lines # => 0 # eppscan # # The scanner supports # * scanning text until <%, <%-, <%= # * while scanning text: # * tokens <%% and %%> are translated to <% and %> respetively and is returned as text. # * tokens <%# and %> (or ending with -%>) and the enclosed text is a comment and is not included in the returned text # * text following a comment that ends with -%> gets trailing whitespace (up to and including a line break) trimmed # and this whitespace is not included in the returned text. # * The continuation {#mode} is set to one of: # * `:epp` - for a <% token # * `:expr` - for a <%= token # * `:text` - when there was no continuation mode (e.g. when input ends with text) # * ':error` - if the tokens are unbalanced (reaching the end without a closing matching token). An error message # is then also available via the method {#message}. # # Note that the intent is to use this specialized scanner to scan the text parts, when continuation mode is `:epp` or `:expr` # the pp lexer should advance scanning (using the string scanner) until it reaches and consumes a `-%>` or '%>ยด token. If it # finds a `-%> token it should pass this on as a `skip_leading` parameter when it performs the next {#scan}. # class EppScanner # The original scanner used by the lexer/container using EppScanner attr_reader :scanner # The resulting mode after the scan. # The mode is one of `:text` (the initial mode), `:epp` embedded code (no output), `:expr` (embedded # expression), or `:error` # attr_reader :mode - # An error message if `mode == :error`, `nil` otherwise. - attr_reader :message + # An error issue if `mode == :error`, `nil` otherwise. + attr_reader :issue # If the first scan should skip leading whitespace (typically detected by the pp lexer when the # pp mode end-token is found (i.e. `-%>`) and then passed on to the scanner. # attr_reader :skip_leading # Creates an EppScanner based on a StringScanner that represents the state where EppScanner should start scanning. # The given scanner will be mutated (i.e. position moved) to reflect the EppScanner's end state after a scan. # def initialize(scanner) @scanner = scanner end + # Here for backwards compatibility. + # @deprecated Use issue instead + # @return [String] the issue message + def message + @issue.nil? ? nil : @issue.format + end + # Scans from the current position in the configured scanner, advances this scanner's position until the end # of the input, or to the first position after a mode switching token (`<%`, `<%-` or `<%=`). Number of processed # lines and continuation mode can be obtained via {#lines}, and {#mode}. # # @return [String, nil] the scanned and processed text, or nil if at the end of the input. # def scan(skip_leading=false) @mode = :text @skip_leading = skip_leading return nil if scanner.eos? s = "" until scanner.eos? part = @scanner.scan_until(/(<%)|\z/) if @skip_leading part.gsub!(/^[ \t]*\r?\n?/,'') @skip_leading = false end # The spec for %%> is to transform it into a literal %>. This is done here, as %%> otherwise would go # undetected in text mode. (i.e. it is not really necessary to escape %> with %%> in text mode unless # adding checks stating that a literal %> is illegal in text (unbalanced). # part.gsub!(/%%>/, '%>') s += part case @scanner.peek(1) when "" # at the end # if s ends with <% then this is an error (unbalanced <% %>) if s.end_with? "<%" @mode = :error - @message = "Unbalanced embedded expression - opening <% and reaching end of input" + @issue = Puppet::Pops::Issues::EPP_UNBALANCED_EXPRESSION else mode = :epp end return s when "-" # trim trailing whitespace on same line from accumulated s # return text and signal switch to pp mode @scanner.getch # drop the - s.gsub!(/\r?\n?[ \t]*<%\z/, '') @mode = :epp return s when "%" # verbatim text # keep the scanned <%, and continue scanning after skipping one % # (i.e. do nothing here) @scanner.getch # drop the % to get a literal <% in the output when "=" # expression # return text and signal switch to expression mode # drop the scanned <%, and skip past -%>, or %>, but also skip %%> @scanner.getch # drop the = s.slice!(-2..-1) @mode = :expr return s when "#" # template comment # drop the scanned <%, and skip past -%>, or %>, but also skip %%> s.slice!(-2..-1) # unless there is an immediate termination i.e. <%#%> scan for the next %> that is not # preceded by a % (i.e. skip %%>) part = scanner.scan_until(/[^%]%>/) unless part - @message = "Reaching end after opening <%# without seeing %>" + @issue = Puppet::Pops::Issues::EPP_UNBALANCED_COMMENT @mode = :error return s end @skip_leading = true if part.end_with?("-%>") # Continue scanning for more text else # Switch to pp after having removed the <% s.slice!(-2..-1) @mode = :epp return s end end end end end diff --git a/lib/puppet/pops/parser/heredoc_support.rb b/lib/puppet/pops/parser/heredoc_support.rb index 9b669ae5b..1c32ee623 100644 --- a/lib/puppet/pops/parser/heredoc_support.rb +++ b/lib/puppet/pops/parser/heredoc_support.rb @@ -1,140 +1,139 @@ module Puppet::Pops::Parser::HeredocSupport # Pattern for heredoc `@(endtag[:syntax][/escapes]) # Produces groups for endtag (group 1), syntax (group 2), and escapes (group 3) # PATTERN_HEREDOC = %r{@\(([^:/\r\n\)]+)(?::[:blank:]*([a-z][a-zA-Z0-9_+]+)[:blank:]*)?(?:/((?:\w|[$])*)[:blank:]*)?\)} def heredoc scn = @scanner ctx = @lexing_context locator = @locator before = scn.pos # scanner is at position before @( # find end of the heredoc spec - str = scn.scan_until(/\)/) || lexer.lex_error("Unclosed parenthesis after '@(' followed by '#{followed_by}'") + str = scn.scan_until(/\)/) || lexer.lex_error(Puppet::Pops::Issues::HEREDOC_UNCLOSED_PARENTHESIS, :followed_by => followed_by) pos_after_heredoc = scn.pos # Note: allows '+' as separator in syntax, but this needs validation as empty segments are not allowed - unless md = str.match(PATTERN_HEREDOC) - lex_error("Invalid syntax in heredoc expected @(endtag[:syntax][/escapes])") - end + md = str.match(PATTERN_HEREDOC) + lex_error(Puppet::Pops::Issues::HEREDOC_INVALID_SYNTAX) unless md endtag = md[1] syntax = md[2] || '' escapes = md[3] endtag.strip! # Is this a dq string style heredoc? (endtag enclosed in "") if endtag =~ /^"(.*)"$/ dqstring_style = true endtag = $1.strip end - lexer.lex_error("Missing endtag in heredoc") unless endtag.length >= 1 + lexer.lex_error(Puppet::Pops::Issues::HEREDOC_MISSING_ENDTAG) unless endtag.length >= 1 resulting_escapes = [] if escapes escapes = "trnsuL$" if escapes.length < 1 escapes = escapes.split('') unless escapes.length == escapes.uniq.length - lex_error("An escape char for @() may only appear once. Got '#{escapes.join(', ')}") + lex_error(Puppet::Pops::Issues::HEREDOC_MULTIPLE_AT_ESCAPES, :escapes => escapes) end resulting_escapes = ["\\"] escapes.each do |e| case e when "t", "r", "n", "s", "u", "$" resulting_escapes << e when "L" resulting_escapes += ["\n", "\r\n"] else - lex_error("Invalid heredoc escape char. Only t, r, n, s, u, L, $ allowed. Got '#{e}'") + lex_error(Puppet::Pops::Issues::HEREDOC_INVALID_ESCAPE, :actual => e) end end end # Produce a heredoc token to make the syntax available to the grammar enqueue_completed([:HEREDOC, syntax, pos_after_heredoc - before], before) # If this is the second or subsequent heredoc on the line, the lexing context's :newline_jump contains # the position after the \n where the next heredoc text should scan. If not set, this is the first # and it should start scanning after the first found \n (or if not found == error). if ctx[:newline_jump] scn.pos = ctx[:newline_jump] else - scn.scan_until(/\n/) || lex_error("Heredoc without any following lines of text") + scn.scan_until(/\n/) || lex_error(Puppet::Pops::Issues::HEREDOC_WITHOUT_TEXT) end # offset 0 for the heredoc, and its line number heredoc_offset = scn.pos heredoc_line = locator.line_for_offset(heredoc_offset)-1 # Compute message to emit if there is no end (to make it refer to the opening heredoc position). - eof_error = create_lex_error("Heredoc without end-tagged line") + eof_error = create_lex_error(Puppet::Pops::Issues::HEREDOC_WITHOUT_END_TAGGED_LINE) # Text from this position (+ lexing contexts offset for any preceding heredoc) is heredoc until a line # that terminates the heredoc is found. # (Endline in EBNF form): WS* ('|' WS*)? ('-' WS*)? endtag WS* \r? (\n|$) endline_pattern = /([[:blank:]]*)(?:([|])[[:blank:]]*)?(?:(\-)[[:blank:]]*)?#{Regexp.escape(endtag)}[[:blank:]]*\r?(?:\n|\z)/ lines = [] while !scn.eos? do one_line = scn.scan_until(/(?:\n|\z)/) raise eof_error unless one_line if md = one_line.match(endline_pattern) leading = md[1] has_margin = md[2] == '|' remove_break = md[3] == '-' # Record position where next heredoc (from same line as current @()) should start scanning for content ctx[:newline_jump] = scn.pos # Process captured lines - remove leading, and trailing newline str = heredoc_text(lines, leading, has_margin, remove_break) # Use a new lexer instance configured with a sub-locator to enable correct positioning sublexer = self.class.new() locator = Puppet::Pops::Parser::Locator::SubLocator.sub_locator(str, locator.file, heredoc_line, heredoc_offset, leading.length()) # Emit a token that provides the grammar with location information about the lines on which the heredoc # content is based. enqueue([:SUBLOCATE, Puppet::Pops::Parser::LexerSupport::TokenValue.new([:SUBLOCATE, lines, lines.reduce(0) {|size, s| size + s.length} ], heredoc_offset, locator)]) sublexer.lex_unquoted_string(str, locator, resulting_escapes, dqstring_style) sublexer.interpolate_uq_to(self) # Continue scan after @(...) scn.pos = pos_after_heredoc return else lines << one_line end end raise eof_error end # Produces the heredoc text string given the individual (unprocessed) lines as an array. # @param lines [Array] unprocessed lines of text in the heredoc w/o terminating line # @param leading [String] the leading text up (up to pipe or other terminating char) # @param has_margin [Boolean] if the left margin should be adjusted as indicated by `leading` # @param remove_break [Boolean] if the line break (\r?\n) at the end of the last line should be removed or not # def heredoc_text(lines, leading, has_margin, remove_break) if has_margin leading_pattern = /^#{Regexp.escape(leading)}/ lines = lines.collect {|s| s.gsub(leading_pattern, '') } end result = lines.join('') result.gsub!(/\r?\n$/, '') if remove_break result end end diff --git a/lib/puppet/pops/parser/lexer2.rb b/lib/puppet/pops/parser/lexer2.rb index 7ac476d36..2193ce8fb 100644 --- a/lib/puppet/pops/parser/lexer2.rb +++ b/lib/puppet/pops/parser/lexer2.rb @@ -1,694 +1,694 @@ # The Lexer is responsbile for turning source text into tokens. # This version is a performance enhanced lexer (in comparison to the 3.x and earlier "future parser" lexer. # # Old returns tokens [:KEY, value, { locator = } # Could return [[token], locator] # or Token.new([token], locator) with the same API x[0] = token_symbol, x[1] = self, x[:key] = (:value, :file, :line, :pos) etc require 'strscan' require 'puppet/pops/parser/lexer_support' require 'puppet/pops/parser/heredoc_support' require 'puppet/pops/parser/interpolation_support' require 'puppet/pops/parser/epp_support' require 'puppet/pops/parser/slurp_support' class Puppet::Pops::Parser::Lexer2 include Puppet::Pops::Parser::LexerSupport include Puppet::Pops::Parser::HeredocSupport include Puppet::Pops::Parser::InterpolationSupport include Puppet::Pops::Parser::SlurpSupport include Puppet::Pops::Parser::EppSupport # ALl tokens have three slots, the token name (a Symbol), the token text (String), and a token text length. # All operator and punctuation tokens reuse singleton arrays Tokens that require unique values create # a unique array per token. # # PEFORMANCE NOTES: # This construct reduces the amount of object that needs to be created for operators and punctuation. # The length is pre-calculated for all singleton tokens. The length is used both to signal the length of # the token, and to advance the scanner position (without having to advance it with a scan(regexp)). # TOKEN_LBRACK = [:LBRACK, '['.freeze, 1].freeze TOKEN_LISTSTART = [:LISTSTART, '['.freeze, 1].freeze TOKEN_RBRACK = [:RBRACK, ']'.freeze, 1].freeze TOKEN_LBRACE = [:LBRACE, '{'.freeze, 1].freeze TOKEN_RBRACE = [:RBRACE, '}'.freeze, 1].freeze TOKEN_SELBRACE = [:SELBRACE, '{'.freeze, 1].freeze TOKEN_LPAREN = [:LPAREN, '('.freeze, 1].freeze TOKEN_RPAREN = [:RPAREN, ')'.freeze, 1].freeze TOKEN_EQUALS = [:EQUALS, '='.freeze, 1].freeze TOKEN_APPENDS = [:APPENDS, '+='.freeze, 2].freeze TOKEN_DELETES = [:DELETES, '-='.freeze, 2].freeze TOKEN_ISEQUAL = [:ISEQUAL, '=='.freeze, 2].freeze TOKEN_NOTEQUAL = [:NOTEQUAL, '!='.freeze, 2].freeze TOKEN_MATCH = [:MATCH, '=~'.freeze, 2].freeze TOKEN_NOMATCH = [:NOMATCH, '!~'.freeze, 2].freeze TOKEN_GREATEREQUAL = [:GREATEREQUAL, '>='.freeze, 2].freeze TOKEN_GREATERTHAN = [:GREATERTHAN, '>'.freeze, 1].freeze TOKEN_LESSEQUAL = [:LESSEQUAL, '<='.freeze, 2].freeze TOKEN_LESSTHAN = [:LESSTHAN, '<'.freeze, 1].freeze TOKEN_FARROW = [:FARROW, '=>'.freeze, 2].freeze TOKEN_PARROW = [:PARROW, '+>'.freeze, 2].freeze TOKEN_LSHIFT = [:LSHIFT, '<<'.freeze, 2].freeze TOKEN_LLCOLLECT = [:LLCOLLECT, '<<|'.freeze, 3].freeze TOKEN_LCOLLECT = [:LCOLLECT, '<|'.freeze, 2].freeze TOKEN_RSHIFT = [:RSHIFT, '>>'.freeze, 2].freeze TOKEN_RRCOLLECT = [:RRCOLLECT, '|>>'.freeze, 3].freeze TOKEN_RCOLLECT = [:RCOLLECT, '|>'.freeze, 2].freeze TOKEN_PLUS = [:PLUS, '+'.freeze, 1].freeze TOKEN_MINUS = [:MINUS, '-'.freeze, 1].freeze TOKEN_DIV = [:DIV, '/'.freeze, 1].freeze TOKEN_TIMES = [:TIMES, '*'.freeze, 1].freeze TOKEN_MODULO = [:MODULO, '%'.freeze, 1].freeze TOKEN_NOT = [:NOT, '!'.freeze, 1].freeze TOKEN_DOT = [:DOT, '.'.freeze, 1].freeze TOKEN_PIPE = [:PIPE, '|'.freeze, 1].freeze TOKEN_AT = [:AT , '@'.freeze, 1].freeze TOKEN_ATAT = [:ATAT , '@@'.freeze, 2].freeze TOKEN_COLON = [:COLON, ':'.freeze, 1].freeze TOKEN_COMMA = [:COMMA, ','.freeze, 1].freeze TOKEN_SEMIC = [:SEMIC, ';'.freeze, 1].freeze TOKEN_QMARK = [:QMARK, '?'.freeze, 1].freeze TOKEN_TILDE = [:TILDE, '~'.freeze, 1].freeze # lexed but not an operator in Puppet TOKEN_REGEXP = [:REGEXP, nil, 0].freeze TOKEN_IN_EDGE = [:IN_EDGE, '->'.freeze, 2].freeze TOKEN_IN_EDGE_SUB = [:IN_EDGE_SUB, '~>'.freeze, 2].freeze TOKEN_OUT_EDGE = [:OUT_EDGE, '<-'.freeze, 2].freeze TOKEN_OUT_EDGE_SUB = [:OUT_EDGE_SUB, '<~'.freeze, 2].freeze # Tokens that are always unique to what has been lexed TOKEN_STRING = [:STRING, nil, 0].freeze TOKEN_WORD = [:WORD, nil, 0].freeze TOKEN_DQPRE = [:DQPRE, nil, 0].freeze TOKEN_DQMID = [:DQPRE, nil, 0].freeze TOKEN_DQPOS = [:DQPRE, nil, 0].freeze TOKEN_NUMBER = [:NUMBER, nil, 0].freeze TOKEN_VARIABLE = [:VARIABLE, nil, 1].freeze TOKEN_VARIABLE_EMPTY = [:VARIABLE, ''.freeze, 1].freeze # HEREDOC has syntax as an argument. TOKEN_HEREDOC = [:HEREDOC, nil, 0].freeze # EPP_START is currently a marker token, may later get syntax TOKEN_EPPSTART = [:EPP_START, nil, 0].freeze TOKEN_EPPEND = [:EPP_END, '%>', 2].freeze TOKEN_EPPEND_TRIM = [:EPP_END_TRIM, '-%>', 3].freeze # This is used for unrecognized tokens, will always be a single character. This particular instance # is not used, but is kept here for documentation purposes. TOKEN_OTHER = [:OTHER, nil, 0] # Keywords are all singleton tokens with pre calculated lengths. # Booleans are pre-calculated (rather than evaluating the strings "false" "true" repeatedly. # KEYWORDS = { "case" => [:CASE, 'case', 4], "class" => [:CLASS, 'class', 5], "default" => [:DEFAULT, 'default', 7], "define" => [:DEFINE, 'define', 6], "if" => [:IF, 'if', 2], "elsif" => [:ELSIF, 'elsif', 5], "else" => [:ELSE, 'else', 4], "inherits" => [:INHERITS, 'inherits', 8], "node" => [:NODE, 'node', 4], "and" => [:AND, 'and', 3], "or" => [:OR, 'or', 2], "undef" => [:UNDEF, 'undef', 5], "false" => [:BOOLEAN, false, 5], "true" => [:BOOLEAN, true, 4], "in" => [:IN, 'in', 2], "unless" => [:UNLESS, 'unless', 6], "function" => [:FUNCTION, 'function', 8], "type" => [:TYPE, 'type', 4], "attr" => [:ATTR, 'attr', 4], "private" => [:PRIVATE, 'private', 7], } KEYWORDS.each {|k,v| v[1].freeze; v.freeze } KEYWORDS.freeze # Reverse lookup of keyword name to string KEYWORD_NAMES = {} KEYWORDS.each {|k, v| KEYWORD_NAMES[v[0]] = k } KEYWORD_NAMES.freeze PATTERN_WS = %r{[[:blank:]\r]+} # The single line comment includes the line ending. PATTERN_COMMENT = %r{#.*\r?} PATTERN_MLCOMMENT = %r{/\*(.*?)\*/}m PATTERN_REGEX = %r{/[^/\n]*/} PATTERN_REGEX_END = %r{/} PATTERN_REGEX_A = %r{\A/} # for replacement to "" PATTERN_REGEX_Z = %r{/\Z} # for replacement to "" PATTERN_REGEX_ESC = %r{\\/} # for replacement to "/" # The 3x patterns: # PATTERN_CLASSREF = %r{((::){0,1}[A-Z][-\w]*)+} # PATTERN_NAME = %r{((::)?[a-z0-9][-\w]*)(::[a-z0-9][-\w]*)*} # The NAME and CLASSREF in 4x are strict. Each segment must start with # a letter a-z and may not contain dashes (\w includes letters, digits and _). # PATTERN_CLASSREF = %r{((::){0,1}[A-Z][\w]*)+} PATTERN_NAME = %r{^((::)?[a-z][\w]*)(::[a-z][\w]*)*$} PATTERN_BARE_WORD = %r{((?:::){0,1}(?:[a-z_](?:[\w-]*[\w])?))+} PATTERN_DOLLAR_VAR = %r{\$(::)?(\w+::)*\w+} PATTERN_NUMBER = %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b} # PERFORMANCE NOTE: # Comparison against a frozen string is faster (than unfrozen). # STRING_BSLASH_BSLASH = '\\'.freeze attr_reader :locator def initialize() end # Clears the lexer state (it is not required to call this as it will be garbage collected # and the next lex call (lex_string, lex_file) will reset the internal state. # def clear() # not really needed, but if someone wants to ensure garbage is collected as early as possible @scanner = nil @locator = nil @lexing_context = nil end # Convenience method, and for compatibility with older lexer. Use the lex_string instead which allows # passing the path to use without first having to call file= (which reads the file if it exists). # (Bad form to use overloading of assignment operator for something that is not really an assignment. Also, # overloading of = does not allow passing more than one argument). # def string=(string) lex_string(string, '') end def lex_string(string, path='') initvars @scanner = StringScanner.new(string) @locator = Puppet::Pops::Parser::Locator.locator(string, path) end # Lexes an unquoted string. # @param string [String] the string to lex # @param locator [Puppet::Pops::Parser::Locator] the locator to use (a default is used if nil is given) # @param escapes [Array] array of character strings representing the escape sequences to transform # @param interpolate [Boolean] whether interpolation of expressions should be made or not. # def lex_unquoted_string(string, locator, escapes, interpolate) initvars @scanner = StringScanner.new(string) @locator = locator || Puppet::Pops::Parser::Locator.locator(string, '') @lexing_context[:escapes] = escapes || UQ_ESCAPES @lexing_context[:uq_slurp_pattern] = (interpolate || !escapes.empty?) ? SLURP_UQ_PATTERN : SLURP_ALL_PATTERN end # Convenience method, and for compatibility with older lexer. Use the lex_file instead. # (Bad form to use overloading of assignment operator for something that is not really an assignment). # def file=(file) lex_file(file) end # TODO: This method should not be used, callers should get the locator since it is most likely required to # compute line, position etc given offsets. # def file @locator ? @locator.file : nil end # Initializes lexing of the content of the given file. An empty string is used if the file does not exist. # def lex_file(file) initvars contents = Puppet::FileSystem.exist?(file) ? Puppet::FileSystem.read(file) : "" @scanner = StringScanner.new(contents.freeze) @locator = Puppet::Pops::Parser::Locator.locator(contents, file) end def initvars @token_queue = [] # NOTE: additional keys are used; :escapes, :uq_slurp_pattern, :newline_jump, :epp_* @lexing_context = { :brace_count => 0, :after => nil, } end # Scans all of the content and returns it in an array # Note that the terminating [false, false] token is included in the result. # def fullscan result = [] scan {|token, value| result.push([token, value]) } result end # A block must be passed to scan. It will be called with two arguments, a symbol for the token, # and an instance of LexerSupport::TokenValue # PERFORMANCE NOTE: The TokenValue is designed to reduce the amount of garbage / temporary data # and to only convert the lexer's internal tokens on demand. It is slightly more costly to create an # instance of a class defined in Ruby than an Array or Hash, but the gain is much bigger since transformation # logic is avoided for many of its members (most are never used (e.g. line/pos information which is only of # value in general for error messages, and for some expressions (which the lexer does not know about). # def scan # PERFORMANCE note: it is faster to access local variables than instance variables. # This makes a small but notable difference since instance member access is avoided for # every token in the lexed content. # scn = @scanner ctx = @lexing_context queue = @token_queue - lex_error_without_pos("Internal Error: No string or file given to lexer to process.") unless scn + lex_error_without_pos(Puppet::Pops::Issues::NO_INPUT_TO_LEXER) unless scn scn.skip(PATTERN_WS) # This is the lexer's main loop until queue.empty? && scn.eos? do if token = queue.shift || lex_token ctx[:after] = token[0] yield token end end # Signals end of input yield [false, false] end # This lexes one token at the current position of the scanner. # PERFORMANCE NOTE: Any change to this logic should be performance measured. # def lex_token # Using three char look ahead (may be faster to do 2 char look ahead since only 2 tokens require a third scn = @scanner ctx = @lexing_context before = @scanner.pos # A look ahead of 3 characters is used since the longest operator ambiguity is resolved at that point. # PERFORMANCE NOTE: It is faster to peek once and use three separate variables for lookahead 0, 1 and 2. # la = scn.peek(3) return nil if la.empty? # Ruby 1.8.7 requires using offset and length (or integers are returned. # PERFORMANCE NOTE. # It is slightly faster to use these local variables than accessing la[0], la[1] etc. in ruby 1.9.3 # But not big enough to warrant two completely different implementations. # la0 = la[0,1] la1 = la[1,1] la2 = la[2,1] # PERFORMANCE NOTE: # A case when, where all the cases are literal values is the fastest way to map from data to code. # It is much faster than using a hash with lambdas, hash with symbol used to then invoke send etc. # This case statement is evaluated for most character positions in puppet source, and great care must # be taken to not introduce performance regressions. # case la0 when '.' emit(TOKEN_DOT, before) when ',' emit(TOKEN_COMMA, before) when '[' if (before == 0 || scn.string[locator.char_offset(before)-1,1] =~ /[[:blank:]\r\n]+/) emit(TOKEN_LISTSTART, before) else emit(TOKEN_LBRACK, before) end when ']' emit(TOKEN_RBRACK, before) when '(' emit(TOKEN_LPAREN, before) when ')' emit(TOKEN_RPAREN, before) when ';' emit(TOKEN_SEMIC, before) when '?' emit(TOKEN_QMARK, before) when '*' emit(TOKEN_TIMES, before) when '%' if la1 == '>' && ctx[:epp_mode] scn.pos += 2 if ctx[:epp_mode] == :expr enqueue_completed(TOKEN_EPPEND, before) end ctx[:epp_mode] = :text interpolate_epp else emit(TOKEN_MODULO, before) end when '{' # The lexer needs to help the parser since the technology used cannot deal with # lookahead of same token with different precedence. This is solved by making left brace # after ? into a separate token. # ctx[:brace_count] += 1 emit(if ctx[:after] == :QMARK TOKEN_SELBRACE else TOKEN_LBRACE end, before) when '}' ctx[:brace_count] -= 1 emit(TOKEN_RBRACE, before) # TOKENS @, @@, @( when '@' case la1 when '@' emit(TOKEN_ATAT, before) # TODO; Check if this is good for the grammar when '(' heredoc else emit(TOKEN_AT, before) end # TOKENS |, |>, |>> when '|' emit(case la1 when '>' la2 == '>' ? TOKEN_RRCOLLECT : TOKEN_RCOLLECT else TOKEN_PIPE end, before) # TOKENS =, =>, ==, =~ when '=' emit(case la1 when '=' TOKEN_ISEQUAL when '>' TOKEN_FARROW when '~' TOKEN_MATCH else TOKEN_EQUALS end, before) # TOKENS '+', '+=', and '+>' when '+' emit(case la1 when '=' TOKEN_APPENDS when '>' TOKEN_PARROW else TOKEN_PLUS end, before) # TOKENS '-', '->', and epp '-%>' (end of interpolation with trim) when '-' if ctx[:epp_mode] && la1 == '%' && la2 == '>' scn.pos += 3 if ctx[:epp_mode] == :expr enqueue_completed(TOKEN_EPPEND_TRIM, before) end interpolate_epp(:with_trim) else emit(case la1 when '>' TOKEN_IN_EDGE when '=' TOKEN_DELETES else TOKEN_MINUS end, before) end # TOKENS !, !=, !~ when '!' emit(case la1 when '=' TOKEN_NOTEQUAL when '~' TOKEN_NOMATCH else TOKEN_NOT end, before) # TOKENS ~>, ~ when '~' emit(la1 == '>' ? TOKEN_IN_EDGE_SUB : TOKEN_TILDE, before) when '#' scn.skip(PATTERN_COMMENT) nil # TOKENS '/', '/*' and '/ regexp /' when '/' case la1 when '*' scn.skip(PATTERN_MLCOMMENT) nil else # regexp position is a regexp, else a div if regexp_acceptable? && value = scn.scan(PATTERN_REGEX) # Ensure an escaped / was not matched while value[-2..-2] == STRING_BSLASH_BSLASH # i.e. \\ value += scn.scan_until(PATTERN_REGEX_END) end regex = value.sub(PATTERN_REGEX_A, '').sub(PATTERN_REGEX_Z, '').gsub(PATTERN_REGEX_ESC, '/') emit_completed([:REGEX, Regexp.new(regex), scn.pos-before], before) else emit(TOKEN_DIV, before) end end # TOKENS <, <=, <|, <<|, <<, <-, <~ when '<' emit(case la1 when '<' if la2 == '|' TOKEN_LLCOLLECT else TOKEN_LSHIFT end when '=' TOKEN_LESSEQUAL when '|' TOKEN_LCOLLECT when '-' TOKEN_OUT_EDGE when '~' TOKEN_OUT_EDGE_SUB else TOKEN_LESSTHAN end, before) # TOKENS >, >=, >> when '>' emit(case la1 when '>' TOKEN_RSHIFT when '=' TOKEN_GREATEREQUAL else TOKEN_GREATERTHAN end, before) # TOKENS :, ::CLASSREF, ::NAME when ':' if la1 == ':' before = scn.pos # PERFORMANCE NOTE: This could potentially be speeded up by using a case/when listing all # upper case letters. Alternatively, the 'A', and 'Z' comparisons may be faster if they are # frozen. # if la2 >= 'A' && la2 <= 'Z' # CLASSREF or error value = scn.scan(PATTERN_CLASSREF) if value after = scn.pos emit_completed([:CLASSREF, value.freeze, after-before], before) else # move to faulty position ('::' was ok) scn.pos = scn.pos + 3 - lex_error("Illegal fully qualified class reference") + lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_CLASS_REFERENCE) end else value = scn.scan(PATTERN_BARE_WORD) if value if value =~ PATTERN_NAME emit_completed([:NAME, value.freeze, scn.pos-before], before) else emit_completed([:WORD, value.freeze, scn.pos - before], before) end else # move to faulty position ('::' was ok) scn.pos = scn.pos + 2 - lex_error("Illegal fully qualified name") + lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_NAME) end end else emit(TOKEN_COLON, before) end when '$' if value = scn.scan(PATTERN_DOLLAR_VAR) emit_completed([:VARIABLE, value[1..-1].freeze, scn.pos - before], before) else # consume the $ and let higher layer complain about the error instead of getting a syntax error emit(TOKEN_VARIABLE_EMPTY, before) end when '"' # Recursive string interpolation, 'interpolate' either returns a STRING token, or # a DQPRE with the rest of the string's tokens placed in the @token_queue interpolate_dq when "'" emit_completed([:STRING, slurp_sqstring.freeze, scn.pos - before], before) when '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' value = scn.scan(PATTERN_NUMBER) if value length = scn.pos - before assert_numeric(value, length) emit_completed([:NUMBER, value.freeze, length], before) else # move to faulty position ([0-9] was ok) scn.pos = scn.pos + 1 - lex_error("Illegal number") + lex_error(Puppet::Pops::Issues::ILLEGAL_NUMBER) end when 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '_' value = scn.scan(PATTERN_BARE_WORD) if value && value =~ PATTERN_NAME emit_completed(KEYWORDS[value] || [:NAME, value.freeze, scn.pos - before], before) elsif value emit_completed([:WORD, value.freeze, scn.pos - before], before) else # move to faulty position ([a-z_] was ok) scn.pos = scn.pos + 1 fully_qualified = scn.match?(/::/) if fully_qualified - lex_error("Illegal fully qualified name") + lex_error(Puppet::Pops::Issues::ILLEGAL_FULLY_QUALIFIED_NAME) else - lex_error("Illegal name or bare word") + lex_error(Puppet::Pops::Issues::ILLEGAL_NAME_OR_BARE_WORD) end end when 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' value = scn.scan(PATTERN_CLASSREF) if value emit_completed([:CLASSREF, value.freeze, scn.pos - before], before) else # move to faulty position ([A-Z] was ok) scn.pos = scn.pos + 1 - lex_error("Illegal class reference") + lex_error(Puppet::Pops::Issues::ILLEGAL_CLASS_REFERENCE) end when "\n" # If heredoc_cont is in effect there are heredoc text lines to skip over # otherwise just skip the newline. # if ctx[:newline_jump] scn.pos = ctx[:newline_jump] ctx[:newline_jump] = nil else scn.pos += 1 end return nil when ' ', "\t", "\r" scn.skip(PATTERN_WS) return nil else # In case of unicode spaces of various kinds that are captured by a regexp, but not by the # simpler case expression above (not worth handling those special cases with better performance). if scn.skip(PATTERN_WS) nil else # "unrecognized char" emit([:OTHER, la0, 1], before) end end end # Emits (produces) a token [:tokensymbol, TokenValue] and moves the scanner's position past the token # def emit(token, byte_offset) @scanner.pos = byte_offset + token[2] [token[0], TokenValue.new(token, byte_offset, @locator)] end # Emits the completed token on the form [:tokensymbol, TokenValue. This method does not alter # the scanner's position. # def emit_completed(token, byte_offset) [token[0], TokenValue.new(token, byte_offset, @locator)] end # Enqueues a completed token at the given offset def enqueue_completed(token, byte_offset) @token_queue << emit_completed(token, byte_offset) end # Allows subprocessors for heredoc etc to enqueue tokens that are tokenized by a different lexer instance # def enqueue(emitted_token) @token_queue << emitted_token end # Answers after which tokens it is acceptable to lex a regular expression. # PERFORMANCE NOTE: # It may be beneficial to turn this into a hash with default value of true for missing entries. # A case expression with literal values will however create a hash internally. Since a reference is # always needed to the hash, this access is almost as costly as a method call. # def regexp_acceptable? case @lexing_context[:after] # Ends of (potential) R-value generating expressions when :RPAREN, :RBRACK, :RRCOLLECT, :RCOLLECT false # End of (potential) R-value - but must be allowed because of case expressions # Called out here to not be mistaken for a bug. when :RBRACE true # Operands (that can be followed by DIV (even if illegal in grammar) when :NAME, :CLASSREF, :NUMBER, :STRING, :BOOLEAN, :DQPRE, :DQMID, :DQPOST, :HEREDOC, :REGEX, :VARIABLE, :WORD false else true end end end diff --git a/lib/puppet/pops/parser/lexer_support.rb b/lib/puppet/pops/parser/lexer_support.rb index d924ad2c9..af6079a4e 100644 --- a/lib/puppet/pops/parser/lexer_support.rb +++ b/lib/puppet/pops/parser/lexer_support.rb @@ -1,135 +1,139 @@ # This is an integral part of the Lexer. It is broken out into a separate module # for maintainability of the code, and making the various parts of the lexer focused. # module Puppet::Pops::Parser::LexerSupport # Returns "" if at end of input, else the following 5 characters with \n \r \t escaped def followed_by return "" if @scanner.eos? result = @scanner.rest[0,5] + "..." result.gsub!("\t", '\t') result.gsub!("\n", '\n') result.gsub!("\r", '\r') result end # Returns a quoted string using " or ' depending on the given a strings's content def format_quote(q) if q == "'" '"\'"' else "'#{q}'" end end # Raises a Puppet::LexError with the given message - def lex_error_without_pos msg - raise Puppet::ParseErrorWithIssue.new(msg, nil, nil, nil, nil, Puppet::Pops::Issues::LEX_ERROR.issue_code) + def lex_error_without_pos(issue, args = {}) + raise Puppet::ParseErrorWithIssue.new(issue.format(args), nil, nil, nil, nil, issue.issue_code) end - # Raises a Puppet::LexError with the given message - def lex_error(msg, pos=nil) - raise create_lex_error(msg, pos) + # Raises a Puppet::ParserErrorWithIssue with the given issue and arguments + def lex_error(issue, args = {}, pos=nil) + raise create_lex_error(issue, args, pos) end def filename file = @locator.file file.is_a?(String) && !file.empty? ? file : nil end def line(pos) @locator.line_for_offset(pos || @scanner.pos) end def position(pos) @locator.pos_on_line(pos || @scanner.pos) end - def lex_warning(msg, issue_code, pos=nil) + def lex_warning(issue, args = {}, pos=nil) Puppet::Util::Log.create({ :level => :warning, - :message => msg, - :issue_code => issue_code, + :message => issue.format(args), + :issue_code => issue.issue_code, :file => filename, :line => line(pos), :pos => position(pos), }) end - def create_lex_error(msg, pos = nil) + # @param issue [Puppet::Pops::Issues::Issue] the issue + # @param args [Hash] Issue arguments + # @param pos [Integer] + # @return [Puppet::ParseErrorWithIssue] the created error + def create_lex_error(issue, args = {}, pos = nil) Puppet::ParseErrorWithIssue.new( - msg, + issue.format(args), filename, line(pos), position(pos), nil, - Puppet::Pops::Issues::LEX_ERROR.issue_code) + issue.issue_code) end # Asserts that the given string value is a float, or an integer in decimal, octal or hex form. # An error is raised if the given value does not comply. # def assert_numeric(value, length) if value =~ /^0[xX].*$/ - lex_error("Not a valid hex number #{value}", length) unless value =~ /^0[xX][0-9A-Fa-f]+$/ + lex_error(Puppet::Pops::Issues::INVALID_HEX_NUMBER, {:value => value}, length) unless value =~ /^0[xX][0-9A-Fa-f]+$/ elsif value =~ /^0[^.].*$/ - lex_error("Not a valid octal number #{value}", length) unless value =~ /^0[0-7]+$/ + lex_error(Puppet::Pops::Issues::INVALID_OCTAL_NUMBER, {:value => value}, length) unless value =~ /^0[0-7]+$/ else - lex_error("Not a valid decimal number #{value}", length) unless value =~ /0?\d+(?:\.\d+)?(?:[eE]-?\d+)?/ + lex_error(Puppet::Pops::Issues::INVALID_DECIMAL_NUMBER, {:value => value}, length) unless value =~ /0?\d+(?:\.\d+)?(?:[eE]-?\d+)?/ end end # A TokenValue keeps track of the token symbol, the lexed text for the token, its length # and its position in its source container. There is a cost associated with computing the # line and position on line information. # class TokenValue < Puppet::Pops::Parser::Locatable attr_reader :token_array attr_reader :offset attr_reader :locator def initialize(token_array, offset, locator) @token_array = token_array @offset = offset @locator = locator end def length @token_array[2] end def [](key) case key when :value @token_array[1] when :file @locator.file when :line @locator.line_for_offset(@offset) when :pos @locator.pos_on_line(@offset) when :length @token_array[2] when :locator @locator when :offset @offset else nil end end def to_s # This format is very compact and is intended for debugging output from racc parsser in # debug mode. If this is made more elaborate the output from a debug run becomes very hard to read. # "'#{self[:value]} #{@token_array[0]}'" end # TODO: Make this comparable for testing # vs symbolic, vs array with symbol and non hash, array with symbol and hash) # end end diff --git a/lib/puppet/pops/parser/slurp_support.rb b/lib/puppet/pops/parser/slurp_support.rb index ceaf8d333..d4a12e8f4 100644 --- a/lib/puppet/pops/parser/slurp_support.rb +++ b/lib/puppet/pops/parser/slurp_support.rb @@ -1,100 +1,97 @@ # This module is an integral part of the Lexer. # It defines the string slurping behavior - finding the string and non string parts in interpolated # strings, translating escape sequences in strings to their single character equivalence. # # PERFORMANCE NOTE: The various kinds of slurping could be made even more generic, but requires # additional parameter passing and evaluation of conditional logic. # TODO: More detailed performance analysis of excessive character escaping and interpolation. # module Puppet::Pops::Parser::SlurpSupport include Puppet::Pops::Parser::LexerSupport SLURP_SQ_PATTERN = /(?:[^\\]|^|[^\\])(?:[\\]{2})*[']/ SLURP_DQ_PATTERN = /(?:[^\\]|^|[^\\])(?:[\\]{2})*(["]|[$]\{?)/ SLURP_UQ_PATTERN = /(?:[^\\]|^|[^\\])(?:[\\]{2})*([$]\{?|\z)/ SLURP_ALL_PATTERN = /.*(\z)/m SQ_ESCAPES = %w{ \\ ' } DQ_ESCAPES = %w{ \\ $ ' " r n t s u}+["\r\n", "\n"] UQ_ESCAPES = %w{ \\ $ r n t s u}+["\r\n", "\n"] def slurp_sqstring # skip the leading ' @scanner.pos += 1 - str = slurp(@scanner, SLURP_SQ_PATTERN, SQ_ESCAPES, :ignore_invalid_escapes) || lex_error("Unclosed quote after \"'\" followed by '#{followed_by}'") + str = slurp(@scanner, SLURP_SQ_PATTERN, SQ_ESCAPES, :ignore_invalid_escapes) + lex_error(Puppet::Pops::Issues::UNRECOGNIZED_QUOTE, :followed_by => followed_by) unless str str[0..-2] # strip closing "'" from result end def slurp_dqstring scn = @scanner last = scn.matched str = slurp(scn, SLURP_DQ_PATTERN, DQ_ESCAPES, false) unless str - lex_error("Unclosed quote after #{format_quote(last)} followed by '#{followed_by}'") + lex_error(Puppet::Pops::Issues::UNCLOSED_QUOTE, :after => format_quote(last), :followed_by => followed_by) end # Terminator may be a single char '"', '$', or two characters '${' group match 1 (scn[1]) from the last slurp holds this terminator = scn[1] [str[0..(-1 - terminator.length)], terminator] end # Copy from old lexer - can do much better def slurp_uqstring scn = @scanner last = scn.matched ignore = true str = slurp(scn, @lexing_context[:uq_slurp_pattern], @lexing_context[:escapes], :ignore_invalid_escapes) # Terminator may be a single char '$', two characters '${', or empty string '' at the end of intput. # Group match 1 holds this. # The exceptional case is found by looking at the subgroup 1 of the most recent match made by the scanner (i.e. @scanner[1]). # This is the last match made by the slurp method (having called scan_until on the scanner). # If there is a terminating character is must be stripped and returned separately. # terminator = scn[1] [str[0..(-1 - terminator.length)], terminator] end # Slurps a string from the given scanner until the given pattern and then replaces any escaped # characters given by escapes into their control-character equivalent or in case of line breaks, replaces the # pattern \r?\n with an empty string. # The returned string contains the terminating character. Returns nil if the scanner can not scan until the given # pattern. # def slurp(scanner, pattern, escapes, ignore_invalid_escapes) str = scanner.scan_until(pattern) || return # Process unicode escapes first as they require getting 4 hex digits # If later a \u is found it is warned not to be a unicode escape if escapes.include?('u') str.gsub!(/\\u([\da-fA-F]{4})/m) { [$1.hex].pack("U") } end str.gsub!(/\\([^\r\n]|(?:\r?\n))/m) { ch = $1 if escapes.include? ch case ch when 'r' ; "\r" when 'n' ; "\n" when 't' ; "\t" when 's' ; " " when 'u' - issue = Puppet::Pops::Issues::ILLEGAL_UNICODE_ESCAPE - lex_warning(issue.format, issue.issue_code) + lex_warning(Puppet::Pops::Issues::ILLEGAL_UNICODE_ESCAPE) "\\u" when "\n" ; '' when "\r\n"; '' else ch end else - unless ignore_invalid_escapes - issue = Puppet::Pops::Issues::UNRECOGNIZED_ESCAPE - lex_warning(issue.format(:ch => ch), issue.issue_code) - end + lex_warning(Puppet::Pops::Issues::UNRECOGNIZED_ESCAPE, :ch => ch) unless ignore_invalid_escapes "\\#{ch}" end } str end end