diff --git a/benchmarks/catalog_memory/benchmarker.rb b/benchmarks/catalog_memory/benchmarker.rb index 39d1552f6..2d706e58d 100644 --- a/benchmarks/catalog_memory/benchmarker.rb +++ b/benchmarks/catalog_memory/benchmarker.rb @@ -1,103 +1,115 @@ require 'erb' require 'ostruct' require 'fileutils' require 'json' # For memory debugging - if the core_ext is not loaded, things break inside mass # require 'mass' require 'objspace' + +# Only runs for Ruby > 2.1.0, and must do this early since ObjectSpace.trace_object_allocations_start must be called +# as early as possible. +# +RUBYVER_ARRAY = RUBY_VERSION.split(".").collect {|s| s.to_i } +RUBYVER = (RUBYVER_ARRAY[0] << 16 | RUBYVER_ARRAY[1] << 8 | RUBYVER_ARRAY[2]) +if RUBYVER < (2 << 16 | 1 << 8 | 0) + puts "catalog_memory requires Ruby version >= 2.1.0 to run" + exit(-1) +end + ObjectSpace.trace_object_allocations_start class Benchmarker include FileUtils + def initialize(target, size) @target = target @size = size @@first_counts = nil @@first_refs = nil @@count = 0 end def setup end def run(args=nil) unless @initialized require 'puppet' config = File.join(@target, 'puppet.conf') Puppet.initialize_settings(['--config', config]) @initialized = true end @@count += 1 env = Puppet.lookup(:environments).get('benchmarking') node = Puppet::Node.new("testing", :environment => env) # Mimic what apply does (or the benchmark will in part run for the *root* environment) Puppet.push_context({:current_environment => env},'current env for benchmark') Puppet::Resource::Catalog.indirection.find("testing", :use_node => node) Puppet.pop_context GC.start sleep(2) counted = ObjectSpace.count_objects({}) if @@first_counts && @@count == 10 diff = @@first_counts.merge(counted) {|k, base_v, new_v| new_v - base_v } puts "Count of objects TOTAL = #{diff[:TOTAL]}, FREE = #{diff[:FREE]}, T_OBJECT = #{diff[:T_OBJECT]}, T_CLASS = #{diff[:T_CLASS]}" changed = diff.reject {|k,v| v == 0} puts "Number of changed classes = #{changed}" GC.start # Find references to leaked Objects leaked_instances = ObjectSpace.each_object.reduce([]) {|x, o| x << o.object_id; x } - @@first_refs File.open("diff.json", "w") do |f| leaked_instances.each do |id| o = ObjectSpace._id2ref(id) f.write(ObjectSpace.dump(o)) if !o.nil? end end # Output information where bound objects where instantiated map_of_allocations = leaked_instances.reduce(Hash.new(0)) do |memo, x| o = ObjectSpace._id2ref(x) class_path = ObjectSpace.allocation_class_path(o) class_path = class_path.nil? ? ObjectSpace.allocation_sourcefile(o) : class_path if !class_path.nil? method = ObjectSpace.allocation_method_id(o) source_line = ObjectSpace.allocation_sourceline(o) memo["#{class_path}##{method}-#{source_line}"] += 1 end memo end map_of_allocations.sort_by {|k, v| v}.reverse.each {|k,v| puts "#{v} #{k}" } # Dump the heap for further analysis GC.start ObjectSpace.dump_all(output: File.open('heap.json','w')) elsif @@count == 1 # Set up baseline and output info for first run @@first_counts = counted @@first_refs = ObjectSpace.each_object.reduce([]) {|x, o| x << o.object_id; x } diff = @@first_counts puts "Count of objects TOTAL = #{diff[:TOTAL]}, FREE = #{diff[:FREE]}, T_OBJECT = #{diff[:T_OBJECT]}, T_CLASS = #{diff[:T_CLASS]}" end end def generate environment = File.join(@target, 'environments', 'benchmarking') templates = File.join('benchmarks', 'empty_catalog') mkdir_p(File.join(environment, 'modules')) mkdir_p(File.join(environment, 'manifests')) render(File.join(templates, 'site.pp.erb'), File.join(environment, 'manifests', 'site.pp'),{}) render(File.join(templates, 'puppet.conf.erb'), File.join(@target, 'puppet.conf'), :location => @target) end def render(erb_file, output_file, bindings) site = ERB.new(File.read(erb_file)) File.open(output_file, 'w') do |fh| fh.write(site.result(OpenStruct.new(bindings).instance_eval { binding })) end end end diff --git a/tasks/memwalk.rake b/tasks/memwalk.rake index 30a1ca237..49077d6c8 100644 --- a/tasks/memwalk.rake +++ b/tasks/memwalk.rake @@ -1,194 +1,195 @@ # Walks the memory dumped into heap.json, and produces a graph of the memory dumped in diff.json # If a single argument (a hex address to one object) is given, the graph is limited to this object and what references it # The heap dumps should be in the format produced by Ruby ObjectSpace in Ruby version 2.1.0 or later. # # The command produces a .dot file that can be rendered with graphwiz dot into SVG. If a memwalk is performed for all # objects in the diff.json, the output file name is memwalk.dot. If it is produced for a single address, the name of the # output file is memwalk-
.dot # # The dot file can be rendered with something like: dot -Tsvg -omemwalk.svg memwalk.dot # desc "Process a diff.json of object ids, and a heap.json of a Ruby 2.1.0 ObjectSpace dump and produce a graph" task :memwalk, [:id] do |t, args| + puts "Memwalk" puts "Computing for #{args[:id] ? args[:id] : 'all'}" @single_id = args[:id] ? args[:id].to_i(16) : nil require 'json' #require 'debug' TYPE = "type".freeze ROOT = "root".freeze ROOT_UC = "ROOT".freeze ADDR = "address".freeze NODE = "NODE".freeze STRING = "STRING".freeze DATA = "DATA".freeze HASH = "HASH".freeze ARRAY = "ARRAY".freeze OBJECT = "OBJECT".freeze CLASS = "CLASS".freeze allocations = {} # An array of integer addresses of the objects to trace bindings for diff_index = {} puts "Reading data" begin puts "Reading diff" lines = 0; File.readlines("diff.json").each do | line | lines += 1 diff = JSON.parse(line) case diff[ TYPE ] when STRING, DATA, HASH, ARRAY # skip the strings else diff_index[ diff[ ADDR ].to_i(16) ] = diff end end puts "Read #{lines} number of diffs" rescue => e raise "ERROR READING DIFF at line #{lines} #{e.message[0, 200]}" end begin puts "Reading heap" lines = 0 allocation = nil File.readlines("heap.json").each do | line | lines += 1 allocation = JSON.parse(line) case allocation[ TYPE ] when ROOT_UC # Graph for single id must include roots, as it may be a root that holds on to the reference # a global variable, thread, etc. # if @single_id allocations[ allocation[ ROOT ] ] = allocation end when NODE # skip the NODE objects - they represent the loaded ruby code when STRING # skip all strings - they are everywhere else allocations[ allocation[ ADDR ].to_i(16) ] = allocation end end puts "Read #{lines} number of entries" rescue => e require 'debug' puts "ERROR READING HEAP #{e.message[0, 200]}" raise e end @heap = allocations puts "Building reference index" # References is an index from a referenced object to an array with addresses to the objects that references it @references = Hash.new { |h, k| h[k] = [] } REFERENCES = "references".freeze allocations.each do |k,v| refs = v[ REFERENCES ] if refs.is_a?(Array) refs.each {|addr| @references[ addr.to_i(16) ] << k } end end @printed = Set.new() def print_object(addr, entry) # only print each node once return unless @printed.add?(addr) begin if addr.is_a?(String) @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr}\"];\n") else @output.write( "x#{node_name(addr)} [label=\"#{node_label(addr, entry)}\\n#{addr.to_s(16)}\"];\n") end rescue => e require 'debug' raise e end end def node_label(addr, entry) if entry[ TYPE ] == OBJECT class_ref = entry[ "class" ].to_i(16) @heap[ class_ref ][ "name" ] elsif entry[ TYPE ] == CLASS "CLASS #{entry[ "name"]}" else entry[TYPE] end end def node_name(addr) return addr if addr.is_a? String addr.to_s(16) end def print_edge(from_addr, to_addr) @output.write("x#{node_name(from_addr)}->x#{node_name(to_addr)};\n") end def closure_and_edges(diff) edges = Set.new() walked = Set.new() puts "Number of diffs referenced = #{diff.count {|k,_| @references[k].is_a?(Array) && @references[k].size() > 0 }}" diff.each {|k,_| walk(k, edges, walked) } edges.each {|e| print_edge(*e) } end def walk(addr, edges, walked) if !@heap[ addr ].nil? print_object(addr, @heap[addr]) @references [ addr ].each do |r| walk_to_object(addr, r, edges, walked) end end end def walk_to_object(to_addr, cursor, edges, walked) return unless walked # if walked to an object, or everything if a single_id is the target if @heap[ cursor ][ TYPE ] == OBJECT || (@single_id && @heap[ cursor ][ TYPE ] == ROOT_UC || @heap[ cursor ][ TYPE ] == CLASS ) # and the edge is unique if edges.add?( [ cursor, to_addr ] ) # then we may not have visited objects this objects is being referred from print_object(cursor, @heap[ cursor ]) # Do not follow what binds a class if @heap[ cursor ][ TYPE ] != CLASS @references[ cursor ].each do |r| walk_to_object(cursor, r, edges, walked.add?(r)) walked.delete(r) end end end else # continue search until Object @references[cursor].each do |r| walk_to_object(to_addr, r, edges, walked.add?(r)) end end end def single_closure_and_edges(the_target) edges = Set.new() walked = Set.new() walk(the_target, edges, walked) edges.each {|e| print_edge(*e) } end puts "creating graph" if @single_id @output = File.open("memwalk-#{@single_id.to_s(16)}.dot", "w") @output.write("digraph root {\n") single_closure_and_edges(@single_id) else @output = File.open("memwalk.dot", "w") @output.write("digraph root {\n") closure_and_edges(diff_index) end @output.write("}\n") @output.close puts "done" end