diff --git a/lib/puppet/parser/lexer.rb b/lib/puppet/parser/lexer.rb index d6527cf79..de1249991 100644 --- a/lib/puppet/parser/lexer.rb +++ b/lib/puppet/parser/lexer.rb @@ -1,618 +1,608 @@ # the scanner/lexer require 'forwardable' require 'strscan' require 'puppet' require 'puppet/util/methodhelper' module Puppet class LexError < RuntimeError; end end module Puppet::Parser; end class Puppet::Parser::Lexer extend Forwardable attr_reader :last, :file, :lexing_context, :token_queue attr_accessor :line, :indefine alias :indefine? :indefine # Returns the position on the line. # This implementation always returns nil. It is here for API reasons in Puppet::Error # which needs to support both --parser current, and --parser future. # def pos # Make the lexer comply with newer API. It does not produce a pos... nil end def lex_error msg raise Puppet::LexError.new(msg) end class Token ALWAYS_ACCEPTABLE = Proc.new { |context| true } include Puppet::Util::MethodHelper attr_accessor :regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate alias skip? skip alias accumulate? accumulate def initialize(string_or_regex, name, options = {}) if string_or_regex.is_a?(String) @name, @string = name, string_or_regex @regex = Regexp.new(Regexp.escape(string_or_regex)) else @name, @regex = name, string_or_regex end set_options(options) @acceptable_when = ALWAYS_ACCEPTABLE end def to_s string or @name.to_s end def acceptable?(context={}) @acceptable_when.call(context) end # Define when the token is able to match. # This provides context that cannot be expressed otherwise, such as feature flags. # # @param block [Proc] a proc that given a context returns a boolean def acceptable_when(block) @acceptable_when = block end end # Maintain a list of tokens. class TokenList extend Forwardable attr_reader :regex_tokens, :string_tokens def_delegator :@tokens, :[] # Create a new token. def add_token(name, regex, options = {}, &block) raise(ArgumentError, "Token #{name} already exists") if @tokens.include?(name) token = Token.new(regex, name, options) @tokens[token.name] = token if token.string @string_tokens << token @tokens_by_string[token.string] = token else @regex_tokens << token end token.meta_def(:convert, &block) if block_given? token end def initialize @tokens = {} @regex_tokens = [] @string_tokens = [] @tokens_by_string = {} end # Look up a token by its value, rather than name. def lookup(string) @tokens_by_string[string] end # Define more tokens. def add_tokens(hash) hash.each do |regex, name| add_token(name, regex) end end # Sort our tokens by length, so we know once we match, we're done. # This helps us avoid the O(n^2) nature of token matching. def sort_tokens @string_tokens.sort! { |a, b| b.string.length <=> a.string.length } end # Yield each token name and value in turn. def each @tokens.each {|name, value| yield name, value } end end TOKENS = TokenList.new TOKENS.add_tokens( '[' => :LBRACK, ']' => :RBRACK, '{' => :LBRACE, '}' => :RBRACE, '(' => :LPAREN, ')' => :RPAREN, '=' => :EQUALS, '+=' => :APPENDS, '==' => :ISEQUAL, '>=' => :GREATEREQUAL, '>' => :GREATERTHAN, '<' => :LESSTHAN, '<=' => :LESSEQUAL, '!=' => :NOTEQUAL, '!' => :NOT, ',' => :COMMA, '.' => :DOT, ':' => :COLON, '@' => :AT, '<<|' => :LLCOLLECT, '|>>' => :RRCOLLECT, '->' => :IN_EDGE, '<-' => :OUT_EDGE, '~>' => :IN_EDGE_SUB, '<~' => :OUT_EDGE_SUB, '<|' => :LCOLLECT, '|>' => :RCOLLECT, ';' => :SEMIC, '?' => :QMARK, '\\' => :BACKSLASH, '=>' => :FARROW, '+>' => :PARROW, '+' => :PLUS, '-' => :MINUS, '/' => :DIV, '*' => :TIMES, '%' => :MODULO, '<<' => :LSHIFT, '>>' => :RSHIFT, '=~' => :MATCH, '!~' => :NOMATCH, %r{((::){0,1}[A-Z][-\w]*)+} => :CLASSREF, "" => :STRING, "" => :DQPRE, "" => :DQMID, "" => :DQPOST, "" => :BOOLEAN ) module Contextual QUOTE_TOKENS = [:DQPRE,:DQMID] REGEX_INTRODUCING_TOKENS = [:NODE,:LBRACE,:RBRACE,:MATCH,:NOMATCH,:COMMA] NOT_INSIDE_QUOTES = Proc.new do |context| !QUOTE_TOKENS.include? context[:after] end INSIDE_QUOTES = Proc.new do |context| QUOTE_TOKENS.include? context[:after] end IN_REGEX_POSITION = Proc.new do |context| REGEX_INTRODUCING_TOKENS.include? context[:after] end IN_STRING_INTERPOLATION = Proc.new do |context| context[:string_interpolation_depth] > 0 end DASHED_VARIABLES_ALLOWED = Proc.new do |context| Puppet[:allow_variables_with_dashes] end VARIABLE_AND_DASHES_ALLOWED = Proc.new do |context| Contextual::DASHED_VARIABLES_ALLOWED.call(context) and TOKENS[:VARIABLE].acceptable?(context) end end # Numbers are treated separately from names, so that they may contain dots. TOKENS.add_token :NUMBER, %r{\b(?:0[xX][0-9A-Fa-f]+|0?\d+(?:\.\d+)?(?:[eE]-?\d+)?)\b} do |lexer, value| [TOKENS[:NAME], value] end TOKENS[:NUMBER].acceptable_when Contextual::NOT_INSIDE_QUOTES TOKENS.add_token :NAME, %r{((::)?[a-z0-9][-\w]*)(::[a-z0-9][-\w]*)*} do |lexer, value| string_token = self # we're looking for keywords here if tmp = KEYWORDS.lookup(value) string_token = tmp if [:TRUE, :FALSE].include?(string_token.name) value = eval(value) string_token = TOKENS[:BOOLEAN] end - else - lexer.warn_if_reserved(value) end [string_token, value] end [:NAME, :CLASSREF].each do |name_token| TOKENS[name_token].acceptable_when Contextual::NOT_INSIDE_QUOTES end TOKENS.add_token :COMMENT, %r{#.*}, :accumulate => true, :skip => true do |lexer,value| value.sub!(/# ?/,'') [self, value] end TOKENS.add_token :MLCOMMENT, %r{/\*(.*?)\*/}m, :accumulate => true, :skip => true do |lexer, value| lexer.line += value.count("\n") value.sub!(/^\/\* ?/,'') value.sub!(/ ?\*\/$/,'') [self,value] end TOKENS.add_token :REGEX, %r{/[^/\n]*/} do |lexer, value| # Make sure we haven't matched an escaped / while value[-2..-2] == '\\' other = lexer.scan_until(%r{/}) value += other end regex = value.sub(%r{\A/}, "").sub(%r{/\Z}, '').gsub("\\/", "/") [self, Regexp.new(regex)] end TOKENS[:REGEX].acceptable_when Contextual::IN_REGEX_POSITION TOKENS.add_token :RETURN, "\n", :skip => true, :incr_line => true, :skip_text => true TOKENS.add_token :SQUOTE, "'" do |lexer, value| [TOKENS[:STRING], lexer.slurpstring(value,["'"],:ignore_invalid_escapes).first ] end DQ_initial_token_types = {'$' => :DQPRE,'"' => :STRING} DQ_continuation_token_types = {'$' => :DQMID,'"' => :DQPOST} TOKENS.add_token :DQUOTE, /"/ do |lexer, value| lexer.tokenize_interpolated_string(DQ_initial_token_types) end TOKENS.add_token :DQCONT, /\}/ do |lexer, value| lexer.tokenize_interpolated_string(DQ_continuation_token_types) end TOKENS[:DQCONT].acceptable_when Contextual::IN_STRING_INTERPOLATION TOKENS.add_token :DOLLAR_VAR_WITH_DASH, %r{\$(?:::)?(?:[-\w]+::)*[-\w]+} do |lexer, value| lexer.warn_if_variable_has_hyphen(value) [TOKENS[:VARIABLE], value[1..-1]] end TOKENS[:DOLLAR_VAR_WITH_DASH].acceptable_when Contextual::DASHED_VARIABLES_ALLOWED TOKENS.add_token :DOLLAR_VAR, %r{\$(::)?(\w+::)*\w+} do |lexer, value| [TOKENS[:VARIABLE],value[1..-1]] end TOKENS.add_token :VARIABLE_WITH_DASH, %r{(?:::)?(?:[-\w]+::)*[-\w]+} do |lexer, value| lexer.warn_if_variable_has_hyphen(value) [TOKENS[:VARIABLE], value] end TOKENS[:VARIABLE_WITH_DASH].acceptable_when Contextual::VARIABLE_AND_DASHES_ALLOWED TOKENS.add_token :VARIABLE, %r{(::)?(\w+::)*\w+} TOKENS[:VARIABLE].acceptable_when Contextual::INSIDE_QUOTES TOKENS.sort_tokens @@pairs = { "{" => "}", "(" => ")", "[" => "]", "<|" => "|>", "<<|" => "|>>" } KEYWORDS = TokenList.new KEYWORDS.add_tokens( "case" => :CASE, "class" => :CLASS, "default" => :DEFAULT, "define" => :DEFINE, "import" => :IMPORT, "if" => :IF, "elsif" => :ELSIF, "else" => :ELSE, "inherits" => :INHERITS, "node" => :NODE, "and" => :AND, "or" => :OR, "undef" => :UNDEF, "false" => :FALSE, "true" => :TRUE, "in" => :IN, "unless" => :UNLESS ) def clear initvars end def expected return nil if @expected.empty? name = @expected[-1] TOKENS.lookup(name) or lex_error "Could not find expected token #{name}" end # scan the whole file # basically just used for testing def fullscan array = [] self.scan { |token, str| # Ignore any definition nesting problems @indefine = false array.push([token,str]) } array end def file=(file) @file = file @line = 1 contents = Puppet::FileSystem.exist?(file) ? Puppet::FileSystem.read(file) : "" @scanner = StringScanner.new(contents) end def_delegator :@token_queue, :shift, :shift_token def find_string_token # We know our longest string token is three chars, so try each size in turn # until we either match or run out of chars. This way our worst-case is three # tries, where it is otherwise the number of string token we have. Also, # the lookups are optimized hash lookups, instead of regex scans. # s = @scanner.peek(3) token = TOKENS.lookup(s[0,3]) || TOKENS.lookup(s[0,2]) || TOKENS.lookup(s[0,1]) [ token, token && @scanner.scan(token.regex) ] end # Find the next token that matches a regex. We look for these first. def find_regex_token best_token = nil best_length = 0 # I tried optimizing based on the first char, but it had # a slightly negative affect and was a good bit more complicated. TOKENS.regex_tokens.each do |token| if length = @scanner.match?(token.regex) and token.acceptable?(lexing_context) # We've found a longer match if length > best_length best_length = length best_token = token end end end return best_token, @scanner.scan(best_token.regex) if best_token end # Find the next token, returning the string and the token. def find_token shift_token || find_regex_token || find_string_token end def initialize initvars end def initvars @line = 1 @previous_token = nil @scanner = nil @file = nil # AAARRGGGG! okay, regexes in ruby are bloody annoying # no one else has "\n" =~ /\s/ @skip = %r{[ \t\r]+} @namestack = [] @token_queue = [] @indefine = false @expected = [] @commentstack = [ ['', @line] ] @lexing_context = { :after => nil, :start_of_line => true, :string_interpolation_depth => 0 } end # Make any necessary changes to the token and/or value. def munge_token(token, value) @line += 1 if token.incr_line skip if token.skip_text return if token.skip and not token.accumulate? token, value = token.convert(self, value) if token.respond_to?(:convert) return unless token if token.accumulate? comment = @commentstack.pop comment[0] << value + "\n" @commentstack.push(comment) end return if token.skip return token, { :value => value, :line => @line } end # Handling the namespace stack def_delegator :@namestack, :pop, :namepop # This value might have :: in it, but we don't care -- it'll be handled # normally when joining, and when popping we want to pop this full value, # however long the namespace is. def_delegator :@namestack, :<<, :namestack # Collect the current namespace. def namespace @namestack.join("::") end def_delegator :@scanner, :rest # this is the heart of the lexer def scan #Puppet.debug("entering scan") lex_error "Invalid or empty string" unless @scanner # Skip any initial whitespace. skip until token_queue.empty? and @scanner.eos? do matched_token, value = find_token # error out if we didn't match anything at all lex_error "Could not match #{@scanner.rest[/^(\S+|\s+|.*)/]}" unless matched_token newline = matched_token.name == :RETURN # this matches a blank line; eat the previously accumulated comments getcomment if lexing_context[:start_of_line] and newline lexing_context[:start_of_line] = newline final_token, token_value = munge_token(matched_token, value) unless final_token skip next end final_token_name = final_token.name lexing_context[:after] = final_token_name unless newline lexing_context[:string_interpolation_depth] += 1 if final_token_name == :DQPRE lexing_context[:string_interpolation_depth] -= 1 if final_token_name == :DQPOST value = token_value[:value] if match = @@pairs[value] and final_token_name != :DQUOTE and final_token_name != :SQUOTE @expected << match elsif exp = @expected[-1] and exp == value and final_token_name != :DQUOTE and final_token_name != :SQUOTE @expected.pop end if final_token_name == :LBRACE or final_token_name == :LPAREN commentpush end if final_token_name == :RPAREN commentpop end yield [final_token_name, token_value] if @previous_token namestack(value) if @previous_token.name == :CLASS and value != '{' if @previous_token.name == :DEFINE if indefine? msg = "Cannot nest definition #{value} inside #{@indefine}" self.indefine = false raise Puppet::ParseError, msg end @indefine = value end end @previous_token = final_token skip end @scanner = nil # This indicates that we're done parsing. yield [false,false] end # Skip any skipchars in our remaining string. def skip @scanner.skip(@skip) end # Provide some limited access to the scanner, for those # tokens that need it. def_delegator :@scanner, :scan_until # we've encountered the start of a string... # slurp in the rest of the string and return it def slurpstring(terminators,escapes=%w{ \\ $ ' " r n t s }+["\n"],ignore_invalid_escapes=false) # we search for the next quote that isn't preceded by a # backslash; the caret is there to match empty strings str = @scanner.scan_until(/([^\\]|^|[^\\])([\\]{2})*[#{terminators}]/) or lex_error "Unclosed quote after '#{last}' in '#{rest}'" @line += str.count("\n") # literal carriage returns add to the line count. str.gsub!(/\\(.)/m) { ch = $1 if escapes.include? ch case ch when 'r'; "\r" when 'n'; "\n" when 't'; "\t" when 's'; " " when "\n"; '' else ch end else Puppet.warning "Unrecognised escape sequence '\\#{ch}'#{file && " in file #{file}"}#{line && " at line #{line}"}" unless ignore_invalid_escapes "\\#{ch}" end } [ str[0..-2],str[-1,1] ] end def tokenize_interpolated_string(token_type,preamble='') value,terminator = slurpstring('"$') token_queue << [TOKENS[token_type[terminator]],preamble+value] variable_regex = if Puppet[:allow_variables_with_dashes] TOKENS[:VARIABLE_WITH_DASH].regex else TOKENS[:VARIABLE].regex end if terminator != '$' or @scanner.scan(/\{/) token_queue.shift elsif var_name = @scanner.scan(variable_regex) warn_if_variable_has_hyphen(var_name) token_queue << [TOKENS[:VARIABLE],var_name] tokenize_interpolated_string(DQ_continuation_token_types) else tokenize_interpolated_string(token_type,token_queue.pop.last + terminator) end end # just parse a string, not a whole file def string=(string) @scanner = StringScanner.new(string) end # returns the content of the currently accumulated content cache def commentpop @commentstack.pop[0] end def getcomment(line = nil) comment = @commentstack.last if line.nil? or comment[1] <= line @commentstack.pop @commentstack.push(['', @line]) return comment[0] end '' end def commentpush @commentstack.push(['', @line]) end def warn_if_variable_has_hyphen(var_name) if var_name.include?('-') Puppet.deprecation_warning("Using `-` in variable names is deprecated at #{file || ''}:#{line}. See http://links.puppetlabs.com/puppet-hyphenated-variable-deprecation") end end - - def warn_if_reserved(name) - case name - when 'private', 'function', 'attr', 'type' - msg = "Future reserved word: #{name}. Either choose a different name or quote the string, if possible. In #{file || ''}:#{line}. See http://links.puppetlabs.com/reserved-words-4" - Puppet.deprecation_warning(msg, name) - end - end end diff --git a/spec/unit/parser/lexer_spec.rb b/spec/unit/parser/lexer_spec.rb index 434227ce6..f0f10e9f3 100755 --- a/spec/unit/parser/lexer_spec.rb +++ b/spec/unit/parser/lexer_spec.rb @@ -1,879 +1,877 @@ #! /usr/bin/env ruby require 'spec_helper' require 'puppet/parser/lexer' # This is a special matcher to match easily lexer output RSpec::Matchers.define :be_like do |*expected| match do |actual| expected.zip(actual).all? { |e,a| !e or a[0] == e or (e.is_a? Array and a[0] == e[0] and (a[1] == e[1] or (a[1].is_a?(Hash) and a[1][:value] == e[1]))) } end end __ = nil def tokens_scanned_from(s) lexer = Puppet::Parser::Lexer.new lexer.string = s lexer.fullscan[0..-2] end describe Puppet::Parser::Lexer do describe "when reading strings" do before { @lexer = Puppet::Parser::Lexer.new } it "should increment the line count for every carriage return in the string" do @lexer.line = 10 @lexer.string = "this\nis\natest'" @lexer.slurpstring("'") @lexer.line.should == 12 end it "should not increment the line count for escapes in the string" do @lexer.line = 10 @lexer.string = "this\\nis\\natest'" @lexer.slurpstring("'") @lexer.line.should == 10 end it "should not think the terminator is escaped, when preceeded by an even number of backslashes" do @lexer.line = 10 @lexer.string = "here\nis\nthe\nstring\\\\'with\nextra\njunk" @lexer.slurpstring("'") @lexer.line.should == 13 end { 'r' => "\r", 'n' => "\n", 't' => "\t", 's' => " " }.each do |esc, expected_result| it "should recognize \\#{esc} sequence" do @lexer.string = "\\#{esc}'" @lexer.slurpstring("'")[0].should == expected_result end end end end describe Puppet::Parser::Lexer::Token do before do @token = Puppet::Parser::Lexer::Token.new(%r{something}, :NAME) end [:regex, :name, :string, :skip, :incr_line, :skip_text, :accumulate].each do |param| it "should have a #{param.to_s} reader" do @token.should be_respond_to(param) end it "should have a #{param.to_s} writer" do @token.should be_respond_to(param.to_s + "=") end end end describe Puppet::Parser::Lexer::Token, "when initializing" do it "should create a regex if the first argument is a string" do Puppet::Parser::Lexer::Token.new("something", :NAME).regex.should == %r{something} end it "should set the string if the first argument is one" do Puppet::Parser::Lexer::Token.new("something", :NAME).string.should == "something" end it "should set the regex if the first argument is one" do Puppet::Parser::Lexer::Token.new(%r{something}, :NAME).regex.should == %r{something} end end describe Puppet::Parser::Lexer::TokenList do before do @list = Puppet::Parser::Lexer::TokenList.new end it "should have a method for retrieving tokens by the name" do token = @list.add_token :name, "whatever" @list[:name].should equal(token) end it "should have a method for retrieving string tokens by the string" do token = @list.add_token :name, "whatever" @list.lookup("whatever").should equal(token) end it "should add tokens to the list when directed" do token = @list.add_token :name, "whatever" @list[:name].should equal(token) end it "should have a method for adding multiple tokens at once" do @list.add_tokens "whatever" => :name, "foo" => :bar @list[:name].should_not be_nil @list[:bar].should_not be_nil end it "should fail to add tokens sharing a name with an existing token" do @list.add_token :name, "whatever" expect { @list.add_token :name, "whatever" }.to raise_error(ArgumentError) end it "should set provided options on tokens being added" do token = @list.add_token :name, "whatever", :skip_text => true token.skip_text.should == true end it "should define any provided blocks as a :convert method" do token = @list.add_token(:name, "whatever") do "foo" end token.convert.should == "foo" end it "should store all string tokens in the :string_tokens list" do one = @list.add_token(:name, "1") @list.string_tokens.should be_include(one) end it "should store all regex tokens in the :regex_tokens list" do one = @list.add_token(:name, %r{one}) @list.regex_tokens.should be_include(one) end it "should not store string tokens in the :regex_tokens list" do one = @list.add_token(:name, "1") @list.regex_tokens.should_not be_include(one) end it "should not store regex tokens in the :string_tokens list" do one = @list.add_token(:name, %r{one}) @list.string_tokens.should_not be_include(one) end it "should sort the string tokens inversely by length when asked" do one = @list.add_token(:name, "1") two = @list.add_token(:other, "12") @list.sort_tokens @list.string_tokens.should == [two, one] end end describe Puppet::Parser::Lexer::TOKENS do before do @lexer = Puppet::Parser::Lexer.new end { :LBRACK => '[', :RBRACK => ']', :LBRACE => '{', :RBRACE => '}', :LPAREN => '(', :RPAREN => ')', :EQUALS => '=', :ISEQUAL => '==', :GREATEREQUAL => '>=', :GREATERTHAN => '>', :LESSTHAN => '<', :LESSEQUAL => '<=', :NOTEQUAL => '!=', :NOT => '!', :COMMA => ',', :DOT => '.', :COLON => ':', :AT => '@', :LLCOLLECT => '<<|', :RRCOLLECT => '|>>', :LCOLLECT => '<|', :RCOLLECT => '|>', :SEMIC => ';', :QMARK => '?', :BACKSLASH => '\\', :FARROW => '=>', :PARROW => '+>', :APPENDS => '+=', :PLUS => '+', :MINUS => '-', :DIV => '/', :TIMES => '*', :LSHIFT => '<<', :RSHIFT => '>>', :MATCH => '=~', :NOMATCH => '!~', :IN_EDGE => '->', :OUT_EDGE => '<-', :IN_EDGE_SUB => '~>', :OUT_EDGE_SUB => '<~', }.each do |name, string| it "should have a token named #{name.to_s}" do Puppet::Parser::Lexer::TOKENS[name].should_not be_nil end it "should match '#{string}' for the token #{name.to_s}" do Puppet::Parser::Lexer::TOKENS[name].string.should == string end end { "case" => :CASE, "class" => :CLASS, "default" => :DEFAULT, "define" => :DEFINE, "import" => :IMPORT, "if" => :IF, "elsif" => :ELSIF, "else" => :ELSE, "inherits" => :INHERITS, "node" => :NODE, "and" => :AND, "or" => :OR, "undef" => :UNDEF, "false" => :FALSE, "true" => :TRUE, "in" => :IN, "unless" => :UNLESS, }.each do |string, name| it "should have a keyword named #{name.to_s}" do Puppet::Parser::Lexer::KEYWORDS[name].should_not be_nil end it "should have the keyword for #{name.to_s} set to #{string}" do Puppet::Parser::Lexer::KEYWORDS[name].string.should == string end end # These tokens' strings don't matter, just that the tokens exist. [:STRING, :DQPRE, :DQMID, :DQPOST, :BOOLEAN, :NAME, :NUMBER, :COMMENT, :MLCOMMENT, :RETURN, :SQUOTE, :DQUOTE, :VARIABLE].each do |name| it "should have a token named #{name.to_s}" do Puppet::Parser::Lexer::TOKENS[name].should_not be_nil end end end describe Puppet::Parser::Lexer::TOKENS[:CLASSREF] do before { @token = Puppet::Parser::Lexer::TOKENS[:CLASSREF] } it "should match against single upper-case alpha-numeric terms" do @token.regex.should =~ "One" end it "should match against upper-case alpha-numeric terms separated by double colons" do @token.regex.should =~ "One::Two" end it "should match against many upper-case alpha-numeric terms separated by double colons" do @token.regex.should =~ "One::Two::Three::Four::Five" end it "should match against upper-case alpha-numeric terms prefixed by double colons" do @token.regex.should =~ "::One" end end describe Puppet::Parser::Lexer::TOKENS[:NAME] do before { @token = Puppet::Parser::Lexer::TOKENS[:NAME] } it "should match against lower-case alpha-numeric terms" do @token.regex.should =~ "one-two" end it "should return itself and the value if the matched term is not a keyword" do Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(nil) lexer = stub("lexer") - lexer.expects(:warn_if_reserved).returns(nil) @token.convert(lexer, "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"] end it "should return the keyword token and the value if the matched term is a keyword" do keyword = stub 'keyword', :name => :testing Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) @token.convert(stub("lexer"), "myval").should == [keyword, "myval"] end it "should return the BOOLEAN token and 'true' if the matched term is the string 'true'" do keyword = stub 'keyword', :name => :TRUE Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) @token.convert(stub('lexer'), "true").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], true] end it "should return the BOOLEAN token and 'false' if the matched term is the string 'false'" do keyword = stub 'keyword', :name => :FALSE Puppet::Parser::Lexer::KEYWORDS.expects(:lookup).returns(keyword) @token.convert(stub('lexer'), "false").should == [Puppet::Parser::Lexer::TOKENS[:BOOLEAN], false] end it "should match against lower-case alpha-numeric terms separated by double colons" do @token.regex.should =~ "one::two" end it "should match against many lower-case alpha-numeric terms separated by double colons" do @token.regex.should =~ "one::two::three::four::five" end it "should match against lower-case alpha-numeric terms prefixed by double colons" do @token.regex.should =~ "::one" end it "should match against nested terms starting with numbers" do @token.regex.should =~ "::1one::2two::3three" end end describe Puppet::Parser::Lexer::TOKENS[:NUMBER] do before do @token = Puppet::Parser::Lexer::TOKENS[:NUMBER] @regex = @token.regex end it "should match against numeric terms" do @regex.should =~ "2982383139" end it "should match against float terms" do @regex.should =~ "29823.235" end it "should match against hexadecimal terms" do @regex.should =~ "0xBEEF0023" end it "should match against float with exponent terms" do @regex.should =~ "10e23" end it "should match against float terms with negative exponents" do @regex.should =~ "10e-23" end it "should match against float terms with fractional parts and exponent" do @regex.should =~ "1.234e23" end it "should return the NAME token and the value" do @token.convert(stub("lexer"), "myval").should == [Puppet::Parser::Lexer::TOKENS[:NAME], "myval"] end end describe Puppet::Parser::Lexer::TOKENS[:COMMENT] do before { @token = Puppet::Parser::Lexer::TOKENS[:COMMENT] } it "should match against lines starting with '#'" do @token.regex.should =~ "# this is a comment" end it "should be marked to get skipped" do @token.skip?.should be_true end it "should be marked to accumulate" do @token.accumulate?.should be_true end it "'s block should return the comment without the #" do @token.convert(@lexer,"# this is a comment")[1].should == "this is a comment" end end describe Puppet::Parser::Lexer::TOKENS[:MLCOMMENT] do before do @token = Puppet::Parser::Lexer::TOKENS[:MLCOMMENT] @lexer = stub 'lexer', :line => 0 end it "should match against lines enclosed with '/*' and '*/'" do @token.regex.should =~ "/* this is a comment */" end it "should match multiple lines enclosed with '/*' and '*/'" do @token.regex.should =~ """/* this is a comment */""" end it "should increase the lexer current line number by the amount of lines spanned by the comment" do @lexer.expects(:line=).with(2) @token.convert(@lexer, "1\n2\n3") end it "should not greedily match comments" do match = @token.regex.match("/* first */ word /* second */") match[1].should == " first " end it "should be marked to accumulate" do @token.accumulate?.should be_true end it "'s block should return the comment without the comment marks" do @lexer.stubs(:line=).with(0) @token.convert(@lexer,"/* this is a comment */")[1].should == "this is a comment" end end describe Puppet::Parser::Lexer::TOKENS[:RETURN] do before { @token = Puppet::Parser::Lexer::TOKENS[:RETURN] } it "should match against carriage returns" do @token.regex.should =~ "\n" end it "should be marked to initiate text skipping" do @token.skip_text.should be_true end it "should be marked to increment the line" do @token.incr_line.should be_true end end shared_examples_for "handling `-` in standard variable names" do |prefix| # Watch out - a regex might match a *prefix* on these, not just the whole # word, so make sure you don't have false positive or negative results based # on that. legal = %w{f foo f::b foo::b f::bar foo::bar 3 foo3 3foo} illegal = %w{f- f-o -f f::-o f::o- f::o-o} ["", "::"].each do |global_scope| legal.each do |name| var = prefix + global_scope + name it "should accept #{var.inspect} as a valid variable name" do (subject.regex.match(var) || [])[0].should == var end end illegal.each do |name| var = prefix + global_scope + name it "when `variable_with_dash` is disabled it should NOT accept #{var.inspect} as a valid variable name" do Puppet[:allow_variables_with_dashes] = false (subject.regex.match(var) || [])[0].should_not == var end it "when `variable_with_dash` is enabled it should NOT accept #{var.inspect} as a valid variable name" do Puppet[:allow_variables_with_dashes] = true (subject.regex.match(var) || [])[0].should_not == var end end end end describe Puppet::Parser::Lexer::TOKENS[:DOLLAR_VAR] do its(:skip_text) { should be_false } its(:incr_line) { should be_false } it_should_behave_like "handling `-` in standard variable names", '$' end describe Puppet::Parser::Lexer::TOKENS[:VARIABLE] do its(:skip_text) { should be_false } its(:incr_line) { should be_false } it_should_behave_like "handling `-` in standard variable names", '' end describe "the horrible deprecation / compatibility variables with dashes" do NamesWithDashes = %w{f- f-o -f f::-o f::o- f::o-o} { Puppet::Parser::Lexer::TOKENS[:DOLLAR_VAR_WITH_DASH] => '$', Puppet::Parser::Lexer::TOKENS[:VARIABLE_WITH_DASH] => '' }.each do |token, prefix| describe token do its(:skip_text) { should be_false } its(:incr_line) { should be_false } context "when compatibly is disabled" do before :each do Puppet[:allow_variables_with_dashes] = false end Puppet::Parser::Lexer::TOKENS.each do |name, value| it "should be unacceptable after #{name}" do token.acceptable?(:after => name).should be_false end end # Yes, this should still *match*, just not be acceptable. NamesWithDashes.each do |name| ["", "::"].each do |global_scope| var = prefix + global_scope + name it "should match #{var.inspect}" do subject.regex.match(var).to_a.should == [var] end end end end context "when compatibility is enabled" do before :each do Puppet[:allow_variables_with_dashes] = true end it "should be acceptable after DQPRE" do token.acceptable?(:after => :DQPRE).should be_true end NamesWithDashes.each do |name| ["", "::"].each do |global_scope| var = prefix + global_scope + name it "should match #{var.inspect}" do subject.regex.match(var).to_a.should == [var] end end end end end end context "deprecation warnings" do before :each do Puppet[:allow_variables_with_dashes] = true end it "should match a top level variable" do Puppet.expects(:deprecation_warning).once tokens_scanned_from('$foo-bar').should == [ [:VARIABLE, { :value => 'foo-bar', :line => 1 }] ] end it "does not warn about a variable without a dash" do Puppet.expects(:deprecation_warning).never tokens_scanned_from('$c').should == [ [:VARIABLE, { :value => "c", :line => 1 }] ] end it "does not warn about referencing a class name that contains a dash" do Puppet.expects(:deprecation_warning).never tokens_scanned_from('foo-bar').should == [ [:NAME, { :value => "foo-bar", :line => 1 }] ] end it "warns about reference to variable" do Puppet.expects(:deprecation_warning).once tokens_scanned_from('$::foo-bar::baz-quux').should == [ [:VARIABLE, { :value => "::foo-bar::baz-quux", :line => 1 }] ] end it "warns about reference to variable interpolated in a string" do Puppet.expects(:deprecation_warning).once tokens_scanned_from('"$::foo-bar::baz-quux"').should == [ [:DQPRE, { :value => "", :line => 1 }], [:VARIABLE, { :value => "::foo-bar::baz-quux", :line => 1 }], [:DQPOST, { :value => "", :line => 1 }], ] end it "warns about reference to variable interpolated in a string as an expression" do Puppet.expects(:deprecation_warning).once tokens_scanned_from('"${::foo-bar::baz-quux}"').should == [ [:DQPRE, { :value => "", :line => 1 }], [:VARIABLE, { :value => "::foo-bar::baz-quux", :line => 1 }], [:DQPOST, { :value => "", :line => 1 }], ] end end end describe Puppet::Parser::Lexer,"when lexing strings" do { %q{'single quoted string')} => [[:STRING,'single quoted string']], %q{"double quoted string"} => [[:STRING,'double quoted string']], %q{'single quoted string with an escaped "\\'"'} => [[:STRING,'single quoted string with an escaped "\'"']], %q{'single quoted string with an escaped "\$"'} => [[:STRING,'single quoted string with an escaped "\$"']], %q{'single quoted string with an escaped "\."'} => [[:STRING,'single quoted string with an escaped "\."']], %q{'single quoted string with an escaped "\r\n"'} => [[:STRING,'single quoted string with an escaped "\r\n"']], %q{'single quoted string with an escaped "\n"'} => [[:STRING,'single quoted string with an escaped "\n"']], %q{'single quoted string with an escaped "\\\\"'} => [[:STRING,'single quoted string with an escaped "\\\\"']], %q{"string with an escaped '\\"'"} => [[:STRING,"string with an escaped '\"'"]], %q{"string with an escaped '\\$'"} => [[:STRING,"string with an escaped '$'"]], %Q{"string with a line ending with a backslash: \\\nfoo"} => [[:STRING,"string with a line ending with a backslash: foo"]], %q{"string with $v (but no braces)"} => [[:DQPRE,"string with "],[:VARIABLE,'v'],[:DQPOST,' (but no braces)']], %q["string with ${v} in braces"] => [[:DQPRE,"string with "],[:VARIABLE,'v'],[:DQPOST,' in braces']], %q["string with ${qualified::var} in braces"] => [[:DQPRE,"string with "],[:VARIABLE,'qualified::var'],[:DQPOST,' in braces']], %q{"string with $v and $v (but no braces)"} => [[:DQPRE,"string with "],[:VARIABLE,"v"],[:DQMID," and "],[:VARIABLE,"v"],[:DQPOST," (but no braces)"]], %q["string with ${v} and ${v} in braces"] => [[:DQPRE,"string with "],[:VARIABLE,"v"],[:DQMID," and "],[:VARIABLE,"v"],[:DQPOST," in braces"]], %q["string with ${'a nested single quoted string'} inside it."] => [[:DQPRE,"string with "],[:STRING,'a nested single quoted string'],[:DQPOST,' inside it.']], %q["string with ${['an array ',$v2]} in it."] => [[:DQPRE,"string with "],:LBRACK,[:STRING,"an array "],:COMMA,[:VARIABLE,"v2"],:RBRACK,[:DQPOST," in it."]], %q{a simple "scanner" test} => [[:NAME,"a"],[:NAME,"simple"], [:STRING,"scanner"],[:NAME,"test"]], %q{a simple 'single quote scanner' test} => [[:NAME,"a"],[:NAME,"simple"], [:STRING,"single quote scanner"],[:NAME,"test"]], %q{a harder 'a $b \c"'} => [[:NAME,"a"],[:NAME,"harder"], [:STRING,'a $b \c"']], %q{a harder "scanner test"} => [[:NAME,"a"],[:NAME,"harder"], [:STRING,"scanner test"]], %q{a hardest "scanner \"test\""} => [[:NAME,"a"],[:NAME,"hardest"],[:STRING,'scanner "test"']], %Q{a hardestest "scanner \\"test\\"\n"} => [[:NAME,"a"],[:NAME,"hardestest"],[:STRING,%Q{scanner "test"\n}]], %q{function("call")} => [[:NAME,"function"],[:LPAREN,"("],[:STRING,'call'],[:RPAREN,")"]], %q["string with ${(3+5)/4} nested math."] => [[:DQPRE,"string with "],:LPAREN,[:NAME,"3"],:PLUS,[:NAME,"5"],:RPAREN,:DIV,[:NAME,"4"],[:DQPOST," nested math."]], %q["$$$$"] => [[:STRING,"$$$$"]], %q["$variable"] => [[:DQPRE,""],[:VARIABLE,"variable"],[:DQPOST,""]], %q["$var$other"] => [[:DQPRE,""],[:VARIABLE,"var"],[:DQMID,""],[:VARIABLE,"other"],[:DQPOST,""]], %q["foo$bar$"] => [[:DQPRE,"foo"],[:VARIABLE,"bar"],[:DQPOST,"$"]], %q["foo$$bar"] => [[:DQPRE,"foo$"],[:VARIABLE,"bar"],[:DQPOST,""]], %q[""] => [[:STRING,""]], %q["123 456 789 0"] => [[:STRING,"123 456 789 0"]], %q["${123} 456 $0"] => [[:DQPRE,""],[:VARIABLE,"123"],[:DQMID," 456 "],[:VARIABLE,"0"],[:DQPOST,""]], %q["$foo::::bar"] => [[:DQPRE,""],[:VARIABLE,"foo"],[:DQPOST,"::::bar"]] }.each { |src,expected_result| it "should handle #{src} correctly" do tokens_scanned_from(src).should be_like(*expected_result) end } end describe Puppet::Parser::Lexer::TOKENS[:DOLLAR_VAR] do before { @token = Puppet::Parser::Lexer::TOKENS[:DOLLAR_VAR] } it "should match against alpha words prefixed with '$'" do @token.regex.should =~ '$this_var' end it "should return the VARIABLE token and the variable name stripped of the '$'" do @token.convert(stub("lexer"), "$myval").should == [Puppet::Parser::Lexer::TOKENS[:VARIABLE], "myval"] end end describe Puppet::Parser::Lexer::TOKENS[:REGEX] do before { @token = Puppet::Parser::Lexer::TOKENS[:REGEX] } it "should match against any expression enclosed in //" do @token.regex.should =~ '/this is a regex/' end it 'should not match if there is \n in the regex' do @token.regex.should_not =~ "/this is \n a regex/" end describe "when scanning" do it "should not consider escaped slashes to be the end of a regex" do tokens_scanned_from("$x =~ /this \\/ foo/").should be_like(__,__,[:REGEX,%r{this / foo}]) end it "should not lex chained division as a regex" do tokens_scanned_from("$x = $a/$b/$c").collect { |name, data| name }.should_not be_include( :REGEX ) end it "should accept a regular expression after NODE" do tokens_scanned_from("node /www.*\.mysite\.org/").should be_like(__,[:REGEX,Regexp.new("www.*\.mysite\.org")]) end it "should accept regular expressions in a CASE" do s = %q{case $variable { "something": {$othervar = 4096 / 2} /regex/: {notice("this notably sucks")} } } tokens_scanned_from(s).should be_like( :CASE,:VARIABLE,:LBRACE,:STRING,:COLON,:LBRACE,:VARIABLE,:EQUALS,:NAME,:DIV,:NAME,:RBRACE,[:REGEX,/regex/],:COLON,:LBRACE,:NAME,:LPAREN,:STRING,:RPAREN,:RBRACE,:RBRACE ) end end it "should return the REGEX token and a Regexp" do @token.convert(stub("lexer"), "/myregex/").should == [Puppet::Parser::Lexer::TOKENS[:REGEX], Regexp.new(/myregex/)] end end describe Puppet::Parser::Lexer, "when lexing comments" do before { @lexer = Puppet::Parser::Lexer.new } it "should accumulate token in munge_token" do token = stub 'token', :skip => true, :accumulate? => true, :incr_line => nil, :skip_text => false token.stubs(:convert).with(@lexer, "# this is a comment").returns([token, " this is a comment"]) @lexer.munge_token(token, "# this is a comment") @lexer.munge_token(token, "# this is a comment") @lexer.getcomment.should == " this is a comment\n this is a comment\n" end it "should add a new comment stack level on LBRACE" do @lexer.string = "{" @lexer.expects(:commentpush) @lexer.fullscan end it "should add a new comment stack level on LPAREN" do @lexer.string = "(" @lexer.expects(:commentpush) @lexer.fullscan end it "should pop the current comment on RPAREN" do @lexer.string = ")" @lexer.expects(:commentpop) @lexer.fullscan end it "should return the current comments on getcomment" do @lexer.string = "# comment" @lexer.fullscan @lexer.getcomment.should == "comment\n" end it "should discard the previous comments on blank line" do @lexer.string = "# 1\n\n# 2" @lexer.fullscan @lexer.getcomment.should == "2\n" end it "should skip whitespace before lexing the next token after a non-token" do tokens_scanned_from("/* 1\n\n */ \ntest").should be_like([:NAME, "test"]) end it "should not return comments seen after the current line" do @lexer.string = "# 1\n\n# 2" @lexer.fullscan @lexer.getcomment(1).should == "" end it "should return a comment seen before the current line" do @lexer.string = "# 1\n# 2" @lexer.fullscan @lexer.getcomment(2).should == "1\n2\n" end end # FIXME: We need to rewrite all of these tests, but I just don't want to take the time right now. describe "Puppet::Parser::Lexer in the old tests" do before { @lexer = Puppet::Parser::Lexer.new } it "should do simple lexing" do { %q{\\} => [[:BACKSLASH,"\\"]], %q{simplest scanner test} => [[:NAME,"simplest"],[:NAME,"scanner"],[:NAME,"test"]], %Q{returned scanner test\n} => [[:NAME,"returned"],[:NAME,"scanner"],[:NAME,"test"]] }.each { |source,expected| tokens_scanned_from(source).should be_like(*expected) } end it "should fail usefully" do expect { tokens_scanned_from('^') }.to raise_error(RuntimeError) end it "should fail if the string is not set" do expect { @lexer.fullscan }.to raise_error(Puppet::LexError) end it "should correctly identify keywords" do tokens_scanned_from("case").should be_like([:CASE, "case"]) end it "should correctly parse class references" do %w{Many Different Words A Word}.each { |t| tokens_scanned_from(t).should be_like([:CLASSREF,t])} end # #774 it "should correctly parse namespaced class refernces token" do %w{Foo ::Foo Foo::Bar ::Foo::Bar}.each { |t| tokens_scanned_from(t).should be_like([:CLASSREF, t]) } end it "should correctly parse names" do %w{this is a bunch of names}.each { |t| tokens_scanned_from(t).should be_like([:NAME,t]) } end it "should correctly parse names with numerals" do %w{1name name1 11names names11}.each { |t| tokens_scanned_from(t).should be_like([:NAME,t]) } end it "should correctly parse empty strings" do expect { tokens_scanned_from('$var = ""') }.to_not raise_error end it "should correctly parse virtual resources" do tokens_scanned_from("@type {").should be_like([:AT, "@"], [:NAME, "type"], [:LBRACE, "{"]) end it "should correctly deal with namespaces" do @lexer.string = %{class myclass} @lexer.fullscan @lexer.namespace.should == "myclass" @lexer.namepop @lexer.namespace.should == "" @lexer.string = "class base { class sub { class more" @lexer.fullscan @lexer.namespace.should == "base::sub::more" @lexer.namepop @lexer.namespace.should == "base::sub" end it "should not put class instantiation on the namespace" do @lexer.string = "class base { class sub { class { mode" @lexer.fullscan @lexer.namespace.should == "base::sub" end it "should correctly handle fully qualified names" do @lexer.string = "class base { class sub::more {" @lexer.fullscan @lexer.namespace.should == "base::sub::more" @lexer.namepop @lexer.namespace.should == "base" end it "should correctly lex variables" do ["$variable", "$::variable", "$qualified::variable", "$further::qualified::variable"].each do |string| tokens_scanned_from(string).should be_like([:VARIABLE,string.sub(/^\$/,'')]) end end it "should end variables at `-`" do tokens_scanned_from('$hyphenated-variable'). should be_like [:VARIABLE, "hyphenated"], [:MINUS, '-'], [:NAME, 'variable'] end it "should not include whitespace in a variable" do tokens_scanned_from("$foo bar").should_not be_like([:VARIABLE, "foo bar"]) end it "should not include excess colons in a variable" do tokens_scanned_from("$foo::::bar").should_not be_like([:VARIABLE, "foo::::bar"]) end end describe 'Puppet::Parser::Lexer handles reserved words' do ['function', 'private', 'attr', 'type'].each do |reserved_bare_word| - it "by warning if '#{reserved_bare_word}' is used as bare word" do - Puppet.expects(:deprecation_warning).once + it "by delivering '#{reserved_bare_word}' as a bare word" do expect(tokens_scanned_from(reserved_bare_word)).to eq([[:NAME, {:value=>reserved_bare_word, :line => 1}]]) end end end describe "Puppet::Parser::Lexer in the old tests when lexing example files" do my_fixtures('*.pp') do |file| it "should correctly lex #{file}" do lexer = Puppet::Parser::Lexer.new lexer.file = file expect { lexer.fullscan }.to_not raise_error end end end describe "when trying to lex a non-existent file" do include PuppetSpec::Files it "should return an empty list of tokens" do lexer = Puppet::Parser::Lexer.new lexer.file = nofile = tmpfile('lexer') Puppet::FileSystem.exist?(nofile).should == false lexer.fullscan.should == [[false,false]] end end