diff --git a/lib/puppet/pops/model/model.rb b/lib/puppet/pops/model/model.rb index 9e2a079b4..f8dcd1f4d 100644 --- a/lib/puppet/pops/model/model.rb +++ b/lib/puppet/pops/model/model.rb @@ -1,114 +1,114 @@ # # The Puppet Pops Metamodel Implementation # # The Puppet Pops Metamodel consists of two parts; the metamodel expressed with RGen in model_meta.rb, # and this file which mixes in implementation details. # require 'rgen/metamodel_builder' require 'rgen/ecore/ecore' require 'rgen/ecore/ecore_ext' require 'rgen/ecore/ecore_to_ruby' module Puppet::Pops require 'puppet/pops/model/model_meta' # TODO: See PUP-2978 for possible performance optimization # Mix in implementation into the generated code module Model class PopsObject include Puppet::Pops::Visitable include Puppet::Pops::Adaptable include Puppet::Pops::Containment end class LocatableExpression module ClassModule # Go through the gymnastics of making either value or pattern settable # with synchronization to the other form. A derived value cannot be serialized # and we want to serialize the pattern. When recreating the object we need to # recreate it from the pattern string. # The below sets both values if one is changed. # def locator unless result = getLocator setLocator(result = Puppet::Pops::Parser::Locator.locator(source_text, source_ref(), line_offsets)) end result end end end class SubLocatedExpression module ClassModule def locator unless result = getLocator # Adapt myself to get the Locator for me adapter = Puppet::Pops::Adapters::SourcePosAdapter.adapt(self) # Get the program (root), and deal with case when not contained in a program program = eAllContainers.find {|c| c.is_a?(Program) } source_ref = program.nil? ? '' : program.source_ref # An outer locator is needed since SubLocator only deals with offsets. This outer locator # has 0,0 as origin. outer_locator = Puppet::Pops::Parser::Locator.locator(adpater.extract_text, source_ref, line_offsets) # Create a sublocator that describes an offset from the outer # NOTE: the offset of self is the same as the sublocator's leading_offset result = Puppet::Pops::Parser::Locator::SubLocator.new(outer_locator, leading_line_count, offset, leading_line_offset) setLocator(result) end result end end end class LiteralRegularExpression module ClassModule # Go through the gymnastics of making either value or pattern settable # with synchronization to the other form. A derived value cannot be serialized # and we want to serialize the pattern. When recreating the object we need to # recreate it from the pattern string. # The below sets both values if one is changed. # def value= regexp setValue regexp setPattern regexp.to_s end def pattern= regexp_string setPattern regexp_string setValue Regexp.new(regexp_string) end end end class AbstractResource module ClassModule def virtual_derived form == :virtual || form == :exported end def exported_derived form == :exported end end end class Program < PopsObject module ClassModule def locator unless result = getLocator - setLocator(result = Puppet::Pops::Parser::Locator.locator(source_text, source_ref(), line_offsets)) + setLocator(result = Puppet::Pops::Parser::Locator.locator(source_text, source_ref(), line_offsets, char_offsets)) end result end end end end end diff --git a/lib/puppet/pops/model/model_meta.rb b/lib/puppet/pops/model/model_meta.rb index 246216aa9..788b6949d 100644 --- a/lib/puppet/pops/model/model_meta.rb +++ b/lib/puppet/pops/model/model_meta.rb @@ -1,576 +1,582 @@ # # The Puppet Pops Metamodel # # This module contains a formal description of the Puppet Pops (*P*uppet *OP*eration instruction*S*). # It describes a Metamodel containing DSL instructions, a description of PuppetType and related # classes needed to evaluate puppet logic. # The metamodel resembles the existing AST model, but it is a semantic model of instructions and # the types that they operate on rather than an Abstract Syntax Tree, although closely related. # # The metamodel is anemic (has no behavior) except basic datatype and type # assertions and reference/containment assertions. # The metamodel is also a generalized description of the Puppet DSL to enable the # same metamodel to be used to express Puppet DSL models (instances) with different semantics as # the language evolves. # # The metamodel is concretized by a validator for a particular version of # the Puppet DSL language. # # This metamodel is expressed using RGen. # require 'rgen/metamodel_builder' module Puppet::Pops::Model extend RGen::MetamodelBuilder::ModuleExtension # A base class for modeled objects that makes them Visitable, and Adaptable. # class PopsObject < RGen::MetamodelBuilder::MMBase abstract end # A Positioned object has an offset measured in an opaque unit (representing characters) from the start # of a source text (starting # from 0), and a length measured in the same opaque unit. The resolution of the opaque unit requires the # aid of a Locator instance that knows about the measure. This information is stored in the model's # root node - a Program. # # The offset and length are optional if the source of the model is not from parsed text. # class Positioned < PopsObject abstract has_attr 'offset', Integer has_attr 'length', Integer end # @abstract base class for expressions class Expression < Positioned abstract end # A Nop - the "no op" expression. # @note not really needed since the evaluator can evaluate nil with the meaning of NoOp # @todo deprecate? May be useful if there is the need to differentiate between nil and Nop when transforming model. # class Nop < Expression end # A binary expression is abstract and has a left and a right expression. The order of evaluation # and semantics are determined by the concrete subclass. # class BinaryExpression < Expression abstract # # @!attribute [rw] left_expr # @return [Expression] contains_one_uni 'left_expr', Expression, :lowerBound => 1 contains_one_uni 'right_expr', Expression, :lowerBound => 1 end # An unary expression is abstract and contains one expression. The semantics are determined by # a concrete subclass. # class UnaryExpression < Expression abstract contains_one_uni 'expr', Expression, :lowerBound => 1 end # A class that simply evaluates to the contained expression. # It is of value in order to preserve user entered parentheses in transformations, and # transformations from model to source. # class ParenthesizedExpression < UnaryExpression; end # A boolean not expression, reversing the truth of the unary expr. # class NotExpression < UnaryExpression; end # An arithmetic expression reversing the polarity of the numeric unary expr. # class UnaryMinusExpression < UnaryExpression; end # Unfolds an array (a.k.a 'splat') class UnfoldExpression < UnaryExpression; end OpAssignment = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'=', :'+=', :'-='], :name => 'OpAssignment') # An assignment expression assigns a value to the lval() of the left_expr. # class AssignmentExpression < BinaryExpression has_attr 'operator', OpAssignment, :lowerBound => 1 end OpArithmetic = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'+', :'-', :'*', :'%', :'/', :'<<', :'>>' ], :name => 'OpArithmetic') # An arithmetic expression applies an arithmetic operator on left and right expressions. # class ArithmeticExpression < BinaryExpression has_attr 'operator', OpArithmetic, :lowerBound => 1 end OpRelationship = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'->', :'<-', :'~>', :'<~'], :name => 'OpRelationship') # A relationship expression associates the left and right expressions # class RelationshipExpression < BinaryExpression has_attr 'operator', OpRelationship, :lowerBound => 1 end # A binary expression, that accesses the value denoted by right in left. i.e. typically # expressed concretely in a language as left[right]. # class AccessExpression < Expression contains_one_uni 'left_expr', Expression, :lowerBound => 1 contains_many_uni 'keys', Expression, :lowerBound => 1 end OpComparison = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'==', :'!=', :'<', :'>', :'<=', :'>=' ], :name => 'OpComparison') # A comparison expression compares left and right using a comparison operator. # class ComparisonExpression < BinaryExpression has_attr 'operator', OpComparison, :lowerBound => 1 end OpMatch = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'!~', :'=~'], :name => 'OpMatch') # A match expression matches left and right using a matching operator. # class MatchExpression < BinaryExpression has_attr 'operator', OpMatch, :lowerBound => 1 end # An 'in' expression checks if left is 'in' right # class InExpression < BinaryExpression; end # A boolean expression applies a logical connective operator (and, or) to left and right expressions. # class BooleanExpression < BinaryExpression abstract end # An and expression applies the logical connective operator and to left and right expression # and does not evaluate the right expression if the left expression is false. # class AndExpression < BooleanExpression; end # An or expression applies the logical connective operator or to the left and right expression # and does not evaluate the right expression if the left expression is true # class OrExpression < BooleanExpression; end # A literal list / array containing 0:M expressions. # class LiteralList < Expression contains_many_uni 'values', Expression end # A Keyed entry has a key and a value expression. It is typically used as an entry in a Hash. # class KeyedEntry < Positioned contains_one_uni 'key', Expression, :lowerBound => 1 contains_one_uni 'value', Expression, :lowerBound => 1 end # A literal hash is a collection of KeyedEntry objects # class LiteralHash < Expression contains_many_uni 'entries', KeyedEntry end # A block contains a list of expressions # class BlockExpression < Expression contains_many_uni 'statements', Expression end # A case option entry in a CaseStatement # class CaseOption < Expression contains_many_uni 'values', Expression, :lowerBound => 1 contains_one_uni 'then_expr', Expression, :lowerBound => 1 end # A case expression has a test, a list of options (multi values => block map). # One CaseOption may contain a LiteralDefault as value. This option will be picked if nothing # else matched. # class CaseExpression < Expression contains_one_uni 'test', Expression, :lowerBound => 1 contains_many_uni 'options', CaseOption end # A query expression is an expression that is applied to some collection. # The contained optional expression may contain different types of relational expressions depending # on what the query is applied to. # class QueryExpression < Expression abstract contains_one_uni 'expr', Expression, :lowerBound => 0 end # An exported query is a special form of query that searches for exported objects. # class ExportedQuery < QueryExpression end # A virtual query is a special form of query that searches for virtual objects. # class VirtualQuery < QueryExpression end OpAttribute = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:'=>', :'+>', ], :name => 'OpAttribute') class AbstractAttributeOperation < Positioned end # An attribute operation sets or appends a value to a named attribute. # class AttributeOperation < AbstractAttributeOperation has_attr 'attribute_name', String, :lowerBound => 1 has_attr 'operator', OpAttribute, :lowerBound => 1 contains_one_uni 'value_expr', Expression, :lowerBound => 1 end # An attribute operation containing an expression that must evaluate to a Hash # class AttributesOperation < AbstractAttributeOperation contains_one_uni 'expr', Expression, :lowerBound => 1 end # An object that collects stored objects from the central cache and returns # them to the current host. Operations may optionally be applied. # class CollectExpression < Expression contains_one_uni 'type_expr', Expression, :lowerBound => 1 contains_one_uni 'query', QueryExpression, :lowerBound => 1 contains_many_uni 'operations', AttributeOperation end class Parameter < Positioned has_attr 'name', String, :lowerBound => 1 contains_one_uni 'value', Expression contains_one_uni 'type_expr', Expression, :lowerBound => 0 has_attr 'captures_rest', Boolean end # Abstract base class for definitions. # class Definition < Expression abstract end # Abstract base class for named and parameterized definitions. class NamedDefinition < Definition abstract has_attr 'name', String, :lowerBound => 1 contains_many_uni 'parameters', Parameter contains_one_uni 'body', Expression end # A resource type definition (a 'define' in the DSL). # class ResourceTypeDefinition < NamedDefinition end # A node definition matches hosts using Strings, or Regular expressions. It may inherit from # a parent node (also using a String or Regular expression). # class NodeDefinition < Definition contains_one_uni 'parent', Expression contains_many_uni 'host_matches', Expression, :lowerBound => 1 contains_one_uni 'body', Expression end class LocatableExpression < Expression has_many_attr 'line_offsets', Integer has_attr 'locator', Object, :lowerBound => 1, :transient => true end # Contains one expression which has offsets reported virtually (offset against the Program's # overall locator). # class SubLocatedExpression < Expression contains_one_uni 'expr', Expression, :lowerBound => 1 # line offset index for contained expressions has_many_attr 'line_offsets', Integer # Number of preceding lines (before the line_offsets) has_attr 'leading_line_count', Integer # The offset of the leading source line (i.e. size of "left margin"). has_attr 'leading_line_offset', Integer # The locator for the sub-locatable's children (not for the sublocator itself) # The locator is not serialized and is recreated on demand from the indexing information # in self. # has_attr 'locator', Object, :lowerBound => 1, :transient => true end # A heredoc is a wrapper around a LiteralString or a ConcatenatedStringExpression with a specification # of syntax. The expectation is that "syntax" has meaning to a validator. A syntax of nil or '' means # "unspecified syntax". # class HeredocExpression < Expression has_attr 'syntax', String contains_one_uni 'text_expr', Expression, :lowerBound => 1 end # A class definition # class HostClassDefinition < NamedDefinition has_attr 'parent_class', String end # i.e {|parameters| body } class LambdaExpression < Expression contains_many_uni 'parameters', Parameter contains_one_uni 'body', Expression end # If expression. If test is true, the then_expr part should be evaluated, else the (optional) # else_expr. An 'elsif' is simply an else_expr = IfExpression, and 'else' is simply else == Block. # a 'then' is typically a Block. # class IfExpression < Expression contains_one_uni 'test', Expression, :lowerBound => 1 contains_one_uni 'then_expr', Expression, :lowerBound => 1 contains_one_uni 'else_expr', Expression end # An if expression with boolean reversed test. # class UnlessExpression < IfExpression end # An abstract call. # class CallExpression < Expression abstract # A bit of a crutch; functions are either procedures (void return) or has an rvalue # this flag tells the evaluator that it is a failure to call a function that is void/procedure # where a value is expected. # has_attr 'rval_required', Boolean, :defaultValueLiteral => "false" contains_one_uni 'functor_expr', Expression, :lowerBound => 1 contains_many_uni 'arguments', Expression contains_one_uni 'lambda', Expression end # A function call where the functor_expr should evaluate to something callable. # class CallFunctionExpression < CallExpression; end # A function call where the given functor_expr should evaluate to the name # of a function. # class CallNamedFunctionExpression < CallExpression; end # A method/function call where the function expr is a NamedAccess and with support for # an optional lambda block # class CallMethodExpression < CallExpression end # Abstract base class for literals. # class Literal < Expression abstract end # A literal value is an abstract value holder. The type of the contained value is # determined by the concrete subclass. # class LiteralValue < Literal abstract end # A Regular Expression Literal. # class LiteralRegularExpression < LiteralValue has_attr 'value', Object, :lowerBound => 1, :transient => true has_attr 'pattern', String, :lowerBound => 1 end # A Literal String # class LiteralString < LiteralValue has_attr 'value', String, :lowerBound => 1 end class LiteralNumber < LiteralValue abstract end # A literal number has a radix of decimal (10), octal (8), or hex (16) to enable string conversion with the input radix. # By default, a radix of 10 is used. # class LiteralInteger < LiteralNumber has_attr 'radix', Integer, :lowerBound => 1, :defaultValueLiteral => "10" has_attr 'value', Integer, :lowerBound => 1 end class LiteralFloat < LiteralNumber has_attr 'value', Float, :lowerBound => 1 end # The DSL `undef`. # class LiteralUndef < Literal; end # The DSL `default` class LiteralDefault < Literal; end # DSL `true` or `false` class LiteralBoolean < LiteralValue has_attr 'value', Boolean, :lowerBound => 1 end # A text expression is an interpolation of an expression. If the embedded expression is # a QualifiedName, it is taken as a variable name and resolved. All other expressions are evaluated. # The result is transformed to a string. # class TextExpression < UnaryExpression; end # An interpolated/concatenated string. The contained segments are expressions. Verbatim sections # should be LiteralString instances, and interpolated expressions should either be # TextExpression instances (if QualifiedNames should be turned into variables), or any other expression # if such treatment is not needed. # class ConcatenatedString < Expression contains_many_uni 'segments', Expression end # A DSL NAME (one or multiple parts separated by '::'). # class QualifiedName < LiteralValue has_attr 'value', String, :lowerBound => 1 end # Represents a parsed reserved word class ReservedWord < LiteralValue has_attr 'word', String, :lowerBound => 1 end # A DSL CLASSREF (one or multiple parts separated by '::' where (at least) the first part starts with an upper case letter). # class QualifiedReference < LiteralValue has_attr 'value', String, :lowerBound => 1 end # A Variable expression looks up value of expr (some kind of name) in scope. # The expression is typically a QualifiedName, or QualifiedReference. # class VariableExpression < UnaryExpression; end # Epp start class EppExpression < Expression # EPP can be specified without giving any parameter specification. # However, the parameters of the lambda in that case are the empty # array, which is the same as when the parameters are explicity # specified as empty. This attribute tracks that difference. has_attr 'parameters_specified', Boolean contains_one_uni 'body', Expression end # A string to render class RenderStringExpression < LiteralString end # An expression to evluate and render class RenderExpression < UnaryExpression end # A resource body describes one resource instance # class ResourceBody < Positioned contains_one_uni 'title', Expression contains_many_uni 'operations', AbstractAttributeOperation end ResourceFormEnum = RGen::MetamodelBuilder::DataTypes::Enum.new( :literals => [:regular, :virtual, :exported ], :name => 'ResourceFormEnum') # An abstract resource describes the form of the resource (regular, virtual or exported) # and adds convenience methods to ask if it is virtual or exported. # All derived classes may not support all forms, and these needs to be validated # class AbstractResource < Expression abstract has_attr 'form', ResourceFormEnum, :lowerBound => 1, :defaultValueLiteral => "regular" has_attr 'virtual', Boolean, :derived => true has_attr 'exported', Boolean, :derived => true end # A resource expression is used to instantiate one or many resource. Resources may optionally # be virtual or exported, an exported resource is always virtual. # class ResourceExpression < AbstractResource contains_one_uni 'type_name', Expression, :lowerBound => 1 contains_many_uni 'bodies', ResourceBody end # A resource defaults sets defaults for a resource type. This class inherits from AbstractResource # but does only support the :regular form (this is intentional to be able to produce better error messages # when illegal forms are applied to a model. # class ResourceDefaultsExpression < AbstractResource contains_one_uni 'type_ref', Expression contains_many_uni 'operations', AbstractAttributeOperation end # A resource override overrides already set values. # class ResourceOverrideExpression < AbstractResource contains_one_uni 'resources', Expression, :lowerBound => 1 contains_many_uni 'operations', AbstractAttributeOperation end # A selector entry describes a map from matching_expr to value_expr. # class SelectorEntry < Positioned contains_one_uni 'matching_expr', Expression, :lowerBound => 1 contains_one_uni 'value_expr', Expression, :lowerBound => 1 end # A selector expression represents a mapping from a left_expr to a matching SelectorEntry. # class SelectorExpression < Expression contains_one_uni 'left_expr', Expression, :lowerBound => 1 contains_many_uni 'selectors', SelectorEntry end # A named access expression looks up a named part. (e.g. $a.b) # class NamedAccessExpression < BinaryExpression; end # A Program is the top level construct returned by the parser # it contains the parsed result in the body, and has a reference to the full source text, - # and its origin. The line_offset's is an array with the start offset of each line. + # and its origin. The line_offset's is an array with the start offset of each line measured + # in bytes or characters (as given by the attribute char_offsets). The `char_offsets` setting + # applies to all offsets recorded in the mode (not just the line_offsets). # + # A model that will be shared across different platforms should use char_offsets true as the byte + # offsets are platform and encoding dependent. + # class Program < PopsObject contains_one_uni 'body', Expression has_many 'definitions', Definition has_attr 'source_text', String has_attr 'source_ref', String has_many_attr 'line_offsets', Integer + has_attr 'char_offsets', Boolean, :defaultValueLiteral => 'false' has_attr 'locator', Object, :lowerBound => 1, :transient => true end end diff --git a/lib/puppet/pops/parser/locator.rb b/lib/puppet/pops/parser/locator.rb index c46c38ee9..b62b69cbc 100644 --- a/lib/puppet/pops/parser/locator.rb +++ b/lib/puppet/pops/parser/locator.rb @@ -1,291 +1,295 @@ # Helper class that keeps track of where line breaks are located and can answer questions about positions. # class Puppet::Pops::Parser::Locator RUBY_1_9_3 = (1 << 16 | 9 << 8 | 3) RUBY_2_0_0 = (2 << 16 | 0 << 8 | 0) RUBYVER_ARRAY = RUBY_VERSION.split(".").collect {|s| s.to_i } RUBYVER = (RUBYVER_ARRAY[0] << 16 | RUBYVER_ARRAY[1] << 8 | RUBYVER_ARRAY[2]) # Computes a symbol representing which ruby runtime this is running on # This implementation will fail if there are more than 255 minor or micro versions of ruby # def self.locator_version if RUBYVER >= RUBY_2_0_0 :ruby20 elsif RUBYVER >= RUBY_1_9_3 :ruby19 else :ruby18 end end LOCATOR_VERSION = locator_version # Constant set to true if multibyte is supported (includes multibyte extended regular expressions) MULTIBYTE = !!(LOCATOR_VERSION == :ruby19 || LOCATOR_VERSION == :ruby20) # Creates, or recreates a Locator. A Locator is created if index is not given (a scan is then # performed of the given source string. # - def self.locator(string, file, index = nil) - case LOCATOR_VERSION - when :ruby20, :ruby19 - Locator19.new(string, file, index) + def self.locator(string, file, index = nil, char_offsets = false) + if(char_offsets) + LocatorForChars.new(string, file, index); else - Locator18.new(string, file, index) + case LOCATOR_VERSION + when :ruby20, :ruby19 + Locator19.new(string, file, index) + else + LocatorForChars.new(string, file, index) + end end end # Returns the file name associated with the string content def file end # Returns the string content def string end # Returns the position on line (first position on a line is 1) def pos_on_line(offset) end # Returns the line number (first line is 1) for the given offset def line_for_offset(offset) end # Returns the offset on line (first offset on a line is 0). # def offset_on_line(offset) end # Returns the character offset for a given reported offset def char_offset(byte_offset) end # Returns the length measured in number of characters from the given start and end reported offset def char_length(offset, end_offset) end # Returns the line index - an array of line offsets for the start position of each line, starting at 0 for # the first line. # def line_index() end # A Sublocator locates a concrete locator (subspace) in a virtual space. # The `leading_line_count` is the (virtual) number of lines preceding the first line in the concrete locator. # The `leading_offset` is the (virtual) byte offset of the first byte in the concrete locator. # The `leading_line_offset` is the (virtual) offset / margin in characters for each line. # # This illustrates characters in the sublocator (`.`) inside the subspace (`X`): # # 1:XXXXXXXX # 2:XXXX.... .. ... .. # 3:XXXX. . .... .. # 4:XXXX............ # # This sublocator would be configured with leading_line_count = 1, # leading_offset=8, and leading_line_offset=4 # # Note that leading_offset must be the same for all lines and measured in characters. # class SubLocator < Puppet::Pops::Parser::Locator attr_reader :locator attr_reader :leading_line_count attr_reader :leading_offset attr_reader :leading_line_offset def self.sub_locator(string, file, leading_line_count, leading_offset, leading_line_offset) self.new(Puppet::Pops::Parser::Locator.locator(string, file), leading_line_count, leading_offset, leading_line_offset) end def initialize(locator, leading_line_count, leading_offset, leading_line_offset) @locator = locator @leading_line_count = leading_line_count @leading_offset = leading_offset @leading_line_offset = leading_line_offset end def file @locator.file end def string @locator.string end # Given offset is offset in the subspace def line_for_offset(offset) @locator.line_for_offset(offset) + @leading_line_count end # Given offset is offset in the subspace def offset_on_line(offset) @locator.offset_on_line(offset) + @leading_line_offset end # Given offset is offset in the subspace def char_offset(offset) effective_line = @locator.line_for_offset(offset) locator.char_offset(offset) + (effective_line * @leading_line_offset) + @leading_offset end # Given offsets are offsets in the subspace def char_length(offset, end_offset) effective_line = @locator.line_for_offset(end_offset) - @locator.line_for_offset(offset) locator.char_length(offset, end_offset) + (effective_line * @leading_line_offset) end def pos_on_line(offset) offset_on_line(offset) +1 end end private class AbstractLocator < Puppet::Pops::Parser::Locator attr_accessor :line_index attr_accessor :string attr_accessor :prev_offset attr_accessor :prev_line attr_reader :string attr_reader :file # Create a locator based on a content string, and a boolean indicating if ruby version support multi-byte strings # or not. # def initialize(string, file, index = nil) @string = string.freeze @file = file.freeze @prev_offset = nil @prev_line = nil @line_index = index - compute_line_index unless !index.nil? + compute_line_index if index.nil? end # Returns the position on line (first position on a line is 1) def pos_on_line(offset) offset_on_line(offset) +1 end def to_location_hash(reported_offset, end_offset) pos = pos_on_line(reported_offset) offset = char_offset(reported_offset) length = char_length(reported_offset, end_offset) start_line = line_for_offset(reported_offset) { :line => start_line, :pos => pos, :offset => offset, :length => length} end # Returns the index of the smallest item for which the item > the given value # This is a min binary search. Although written in Ruby it is only slightly slower than # the corresponding method in C in Ruby 2.0.0 - the main benefit to use this method over # the Ruby C version is that it returns the index (not the value) which means there is not need # to have an additional structure to get the index (or record the index in the structure). This # saves both memory and CPU. It also does not require passing a block that is called since this # method is specialized to search the line index. # def ary_bsearch_i(ary, value) low = 0 high = ary.length mid = nil smaller = false satisfied = false v = nil while low < high do mid = low + ((high - low) / 2) v = (ary[mid] > value) if v == true satisfied = true smaller = true elsif !v smaller = false else raise TypeError, "wrong argument, must be boolean or nil, got '#{v.class}'" end if smaller high = mid else low = mid + 1; end end return nil if low == ary.length return nil if !satisfied return low end # Common impl for 18 and 19 since scanner is byte based def compute_line_index scanner = StringScanner.new(string) result = [0] # first line starts at 0 while scanner.scan_until(/\n/) result << scanner.pos end self.line_index = result.freeze end # Returns the line number (first line is 1) for the given offset def line_for_offset(offset) if prev_offset == offset # use cache return prev_line end if line_nbr = ary_bsearch_i(line_index, offset) # cache prev_offset = offset prev_line = line_nbr return line_nbr end # If not found it is after last # clear cache prev_offset = prev_line = nil return line_index.size end end - class Locator18 < AbstractLocator + class LocatorForChars < AbstractLocator def offset_on_line(offset) line_offset = line_index[ line_for_offset(offset)-1 ] offset - line_offset end def char_offset(char_offset) char_offset end def char_length(offset, end_offset) end_offset - offset end end # This implementation is for Ruby19 and Ruby20. It uses byteslice to get strings from byte based offsets. # For Ruby20 this is faster than using the Stringscanner.charpos method (byteslice outperforms it, when # strings are frozen). # class Locator19 < AbstractLocator # Returns the offset on line (first offset on a line is 0). # Ruby 19 is multibyte but has no character position methods, must use byteslice def offset_on_line(offset) line_offset = line_index[ line_for_offset(offset)-1 ] string.byteslice(line_offset, offset-line_offset).length end # Returns the character offset for a given byte offset # Ruby 19 is multibyte but has no character position methods, must use byteslice def char_offset(byte_offset) string.byteslice(0, byte_offset).length end # Returns the length measured in number of characters from the given start and end byte offseta def char_length(offset, end_offset) string.byteslice(offset, end_offset - offset).length end end end