Browse Source

Remove the Dark Matter Detector (DMD) Memeory debugger component.

This resolves #376.
pull/1/head
wolfbeast 4 years ago committed by Roy Tam
parent
commit
92fbd042f5
  1. 3
      application/palemoon/installer/package-manifest.in
  2. 3
      browser/installer/package-manifest.in
  3. 5
      build/moz.build
  4. 11
      config/config.mk
  5. 2122
      memory/replace/dmd/DMD.cpp
  6. 310
      memory/replace/dmd/DMD.h
  7. 2
      memory/replace/dmd/README
  8. 261
      memory/replace/dmd/block_analyzer.py
  9. 890
      memory/replace/dmd/dmd.py
  10. 39
      memory/replace/dmd/moz.build
  11. 379
      memory/replace/dmd/test/SmokeDMD.cpp
  12. 25
      memory/replace/dmd/test/basic-scan-32-expected.txt
  13. 25
      memory/replace/dmd/test/basic-scan-64-expected.txt
  14. 18
      memory/replace/dmd/test/complete-empty-cumulative-expected.txt
  15. 29
      memory/replace/dmd/test/complete-empty-dark-matter-expected.txt
  16. 18
      memory/replace/dmd/test/complete-empty-live-expected.txt
  17. 265
      memory/replace/dmd/test/complete-full1-dark-matter-expected.txt
  18. 127
      memory/replace/dmd/test/complete-full1-live-expected.txt
  19. 173
      memory/replace/dmd/test/complete-full2-cumulative-expected.txt
  20. 140
      memory/replace/dmd/test/complete-full2-dark-matter-expected.txt
  21. 56
      memory/replace/dmd/test/complete-partial-live-expected.txt
  22. 26
      memory/replace/dmd/test/moz.build
  23. 83
      memory/replace/dmd/test/scan-test.py
  24. 127
      memory/replace/dmd/test/script-diff-dark-matter-expected.txt
  25. 51
      memory/replace/dmd/test/script-diff-dark-matter1.json
  26. 51
      memory/replace/dmd/test/script-diff-dark-matter2.json
  27. 81
      memory/replace/dmd/test/script-diff-live-expected.txt
  28. 51
      memory/replace/dmd/test/script-diff-live1.json
  29. 53
      memory/replace/dmd/test/script-diff-live2.json
  30. 72
      memory/replace/dmd/test/script-ignore-alloc-fns-expected.txt
  31. 46
      memory/replace/dmd/test/script-ignore-alloc-fns.json
  32. 26
      memory/replace/dmd/test/script-max-frames-1-expected.txt
  33. 48
      memory/replace/dmd/test/script-max-frames-3-expected.txt
  34. 69
      memory/replace/dmd/test/script-max-frames-8-expected.txt
  35. 43
      memory/replace/dmd/test/script-max-frames.json
  36. 46
      memory/replace/dmd/test/script-sort-by-num-blocks-expected.txt
  37. 46
      memory/replace/dmd/test/script-sort-by-req-expected.txt
  38. 46
      memory/replace/dmd/test/script-sort-by-slop-expected.txt
  39. 46
      memory/replace/dmd/test/script-sort-by-usable-expected.txt
  40. BIN
      memory/replace/dmd/test/script-sort-by.json.gz
  41. 226
      memory/replace/dmd/test/test_dmd.js
  42. 35
      memory/replace/dmd/test/xpcshell.ini
  43. 2
      memory/replace/moz.build
  44. 3
      mobile/android/installer/package-manifest.in
  45. 15
      old-configure.in
  46. 52
      storage/mozStorageService.cpp
  47. 16
      toolkit/components/aboutmemory/content/aboutMemory.js
  48. 16
      toolkit/moz.configure
  49. 4
      toolkit/mozapps/installer/upload-files.mk
  50. 21
      xpcom/base/nsIMemoryReporter.idl
  51. 62
      xpcom/base/nsMemoryInfoDumper.cpp
  52. 9
      xpcom/base/nsMemoryInfoDumper.h
  53. 132
      xpcom/base/nsMemoryReporterManager.cpp
  54. 42
      xpcom/base/nsTraceRefcnt.cpp
  55. 3
      xpcom/tests/gtest/TestDeadlockDetectorScalability.cpp

3
application/palemoon/installer/package-manifest.in

@ -72,9 +72,6 @@
#ifndef MOZ_STATIC_JS
@BINPATH@/@DLL_PREFIX@mozjs@DLL_SUFFIX@
#endif
#ifdef MOZ_DMD
@BINPATH@/@DLL_PREFIX@dmd@DLL_SUFFIX@
#endif
#ifndef MOZ_SYSTEM_NSPR
#ifndef MOZ_FOLD_LIBS
@BINPATH@/@DLL_PREFIX@nspr4@DLL_SUFFIX@

3
browser/installer/package-manifest.in

@ -69,9 +69,6 @@
#ifndef MOZ_STATIC_JS
@BINPATH@/@DLL_PREFIX@mozjs@DLL_SUFFIX@
#endif
#ifdef MOZ_DMD
@BINPATH@/@DLL_PREFIX@dmd@DLL_SUFFIX@
#endif
#ifndef MOZ_SYSTEM_NSPR
#ifndef MOZ_FOLD_LIBS
@BINPATH@/@DLL_PREFIX@nspr4@DLL_SUFFIX@

5
build/moz.build

@ -52,16 +52,13 @@ if CONFIG['MOZ_PHOENIX']:
'compare-mozconfig/compare-mozconfigs-wrapper.py',
]
if CONFIG['ENABLE_TESTS'] or CONFIG['MOZ_DMD']:
if CONFIG['ENABLE_TESTS']:
FINAL_TARGET_FILES += ['/tools/rb/fix_stack_using_bpsyms.py']
if CONFIG['OS_ARCH'] == 'Darwin':
FINAL_TARGET_FILES += ['/tools/rb/fix_macosx_stack.py']
if CONFIG['OS_ARCH'] == 'Linux':
FINAL_TARGET_FILES += ['/tools/rb/fix_linux_stack.py']
if CONFIG['MOZ_DMD']:
FINAL_TARGET_FILES += ['/memory/replace/dmd/dmd.py']
# Put a useful .gdbinit in the bin directory, to be picked up automatically
# by GDB when we debug executables there.
FINAL_TARGET_FILES += ['/.gdbinit']

11
config/config.mk

@ -173,17 +173,6 @@ OS_LDFLAGS += -DEBUG -OPT:REF
endif
endif
#
# Handle DMD in optimized builds.
#
ifdef MOZ_DMD
ifdef HAVE_64BIT_BUILD
OS_LDFLAGS = -DEBUG -OPT:REF,ICF
else
OS_LDFLAGS = -DEBUG -OPT:REF
endif
endif # MOZ_DMD
endif # MOZ_DEBUG
endif # WINNT && !GNU_CC

2122
memory/replace/dmd/DMD.cpp

File diff suppressed because it is too large Load Diff

310
memory/replace/dmd/DMD.h

@ -1,310 +0,0 @@
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#ifndef DMD_h___
#define DMD_h___
#include <string.h>
#include <stdarg.h>
#include "mozilla/DebugOnly.h"
#include "mozilla/Move.h"
#include "mozilla/Types.h"
#include "mozilla/UniquePtr.h"
#include "replace_malloc_bridge.h"
namespace mozilla {
class JSONWriteFunc;
namespace dmd {
struct Sizes
{
size_t mStackTracesUsed;
size_t mStackTracesUnused;
size_t mStackTraceTable;
size_t mLiveBlockTable;
size_t mDeadBlockTable;
Sizes() { Clear(); }
void Clear() { memset(this, 0, sizeof(Sizes)); }
};
// See further below for a description of each method. The DMDFuncs class
// should contain a virtual method for each of them (except IsRunning,
// which can be inferred from the DMDFuncs singleton existing).
struct DMDFuncs
{
virtual void Report(const void*);
virtual void ReportOnAlloc(const void*);
virtual void ClearReports();
virtual void Analyze(UniquePtr<JSONWriteFunc>);
virtual void SizeOf(Sizes*);
virtual void StatusMsg(const char*, va_list);
virtual void ResetEverything(const char*);
#ifndef REPLACE_MALLOC_IMPL
// We deliberately don't use ReplaceMalloc::GetDMDFuncs here, because if we
// did, the following would happen.
// - The code footprint of each call to Get() larger as GetDMDFuncs ends
// up inlined.
// - When no replace-malloc library is loaded, the number of instructions
// executed is equivalent, but don't necessarily fit in the same cache
// line.
// - When a non-DMD replace-malloc library is loaded, the overhead is
// higher because there is first a check for the replace malloc bridge
// and then for the DMDFuncs singleton.
// Initializing the DMDFuncs singleton on the first access makes the
// overhead even worse. Either Get() is inlined and massive, or it isn't
// and a simple value check becomes a function call.
static DMDFuncs* Get() { return sSingleton.Get(); }
private:
// Wrapper class keeping a pointer to the DMD functions. It is statically
// initialized because it needs to be set early enough.
// Debug builds also check that it's never accessed before the static
// initialization actually occured, which could be the case if some other
// static initializer ended up calling into DMD.
class Singleton
{
public:
Singleton()
: mValue(ReplaceMalloc::GetDMDFuncs())
#ifdef DEBUG
, mInitialized(true)
#endif
{}
DMDFuncs* Get()
{
MOZ_ASSERT(mInitialized);
return mValue;
}
private:
DMDFuncs* mValue;
#ifdef DEBUG
bool mInitialized;
#endif
};
// This singleton pointer must be defined on the program side. In Gecko,
// this is done in xpcom/base/nsMemoryInfoDumper.cpp.
static /* DMDFuncs:: */Singleton sSingleton;
#endif
};
#ifndef REPLACE_MALLOC_IMPL
// Mark a heap block as reported by a memory reporter.
inline void
Report(const void* aPtr)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->Report(aPtr);
}
}
// Mark a heap block as reported immediately on allocation.
inline void
ReportOnAlloc(const void* aPtr)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->ReportOnAlloc(aPtr);
}
}
// Clears existing reportedness data from any prior runs of the memory
// reporters. The following sequence should be used.
// - ClearReports()
// - run the memory reporters
// - Analyze()
// This sequence avoids spurious twice-reported warnings.
inline void
ClearReports()
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->ClearReports();
}
}
// Determines which heap blocks have been reported, and dumps JSON output
// (via |aWriter|) describing the heap.
//
// The following sample output contains comments that explain the format and
// design choices. The output files can be quite large, so a number of
// decisions were made to minimize size, such as using short property names and
// omitting properties whenever possible.
//
// {
// // The version number of the format, which will be incremented each time
// // backwards-incompatible changes are made. A mandatory integer.
// //
// // Version history:
// // - 1: Bug 1044709
// // - 2: Bug 1094552
// // - 3: Bug 1100851
// // - 4: Bug 1121830
// // - 5: Bug 1253512
// "version": 5,
//
// // Information about how DMD was invoked. A mandatory object.
// "invocation": {
// // The contents of the $DMD environment variable. A string, or |null| if
// // $DMD is undefined.
// "dmdEnvVar": "--mode=dark-matter",
//
// // The profiling mode. A mandatory string taking one of the following
// // values: "live", "dark-matter", "cumulative", "scan".
// "mode": "dark-matter",
// },
//
// // Details of all analyzed heap blocks. A mandatory array.
// "blockList": [
// // An example of a heap block.
// {
// // Requested size, in bytes. This is a mandatory integer.
// "req": 3584,
//
// // Requested slop size, in bytes. This is mandatory if it is non-zero,
// // but omitted otherwise.
// "slop": 512,
//
// // The stack trace at which the block was allocated. An optional
// // string that indexes into the "traceTable" object. If omitted, no
// // allocation stack trace was recorded for the block.
// "alloc": "A",
//
// // One or more stack traces at which this heap block was reported by a
// // memory reporter. An optional array that will only be present in
// // "dark-matter" mode. The elements are strings that index into
// // the "traceTable" object.
// "reps": ["B"]
//
// // The number of heap blocks with exactly the above properties. This
// // is mandatory if it is greater than one, but omitted otherwise.
// // (Blocks with identical properties don't have to be aggregated via
// // this property, but it can greatly reduce output file size.)
// "num": 5,
//
// // The address of the block. This is mandatory in "scan" mode, but
// // omitted otherwise.
// "addr": "4e4e4e4e",
//
// // The contents of the block, read one word at a time. This is
// // mandatory in "scan" mode for blocks at least one word long, but
// // omitted otherwise.
// "contents": ["0", "6", "7f7f7f7f", "0"]
// }
// ],
//
// // The stack traces referenced by elements of the "blockList" array. This
// // could be an array, but making it an object makes it easier to see
// // which stacks correspond to which references in the "blockList" array.
// "traceTable": {
// // Each property corresponds to a stack trace mentioned in the "blocks"
// // object. Each element is an index into the "frameTable" object.
// "A": ["D", "E"],
// "B": ["F", "G"]
// },
//
// // The stack frames referenced by the "traceTable" object. The
// // descriptions can be quite long, so they are stored separately from the
// // "traceTable" object so that each one only has to be written once.
// // This could also be an array, but again, making it an object makes it
// // easier to see which frames correspond to which references in the
// // "traceTable" object.
// "frameTable": {
// // Each property key is a frame key mentioned in the "traceTable" object.
// // Each property value is a string containing a frame description. Each
// // frame description must be in a format recognized by the stack-fixing
// // scripts (e.g. fix_linux_stack.py), which require a frame number at
// // the start. Because each stack frame description in this table can
// // be shared between multiple stack traces, we use a dummy value of
// // #00. The proper frame number can be reconstructed later by scripts
// // that output stack traces in a conventional non-shared format.
// "D": "#00: foo (Foo.cpp:123)",
// "E": "#00: bar (Bar.cpp:234)",
// "F": "#00: baz (Baz.cpp:345)",
// "G": "#00: quux (Quux.cpp:456)"
// }
// }
//
// Implementation note: normally, this function wouldn't be templated, but in
// that case, the function is compiled, which makes the destructor for the
// UniquePtr fire up, and that needs JSONWriteFunc to be fully defined. That,
// in turn, requires to include JSONWriter.h, which includes
// double-conversion.h, which ends up breaking various things built with
// -Werror for various reasons.
//
template <typename JSONWriteFunc>
inline void
Analyze(UniquePtr<JSONWriteFunc> aWriteFunc)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->Analyze(Move(aWriteFunc));
}
}
// Gets the size of various data structures. Used to implement a memory
// reporter for DMD.
inline void
SizeOf(Sizes* aSizes)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->SizeOf(aSizes);
}
}
// Prints a status message prefixed with "DMD[<pid>]". Use sparingly.
inline void
StatusMsg(const char* aFmt, ...)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
va_list ap;
va_start(ap, aFmt);
funcs->StatusMsg(aFmt, ap);
va_end(ap);
}
}
// Indicates whether or not DMD is running.
inline bool
IsRunning()
{
return !!DMDFuncs::Get();
}
// Resets all DMD options and then sets new ones according to those specified
// in |aOptions|. Also clears all recorded data about allocations. Only used
// for testing purposes.
inline void
ResetEverything(const char* aOptions)
{
DMDFuncs* funcs = DMDFuncs::Get();
if (funcs) {
funcs->ResetEverything(aOptions);
}
}
#endif
} // namespace dmd
} // namespace mozilla
#endif /* DMD_h___ */

2
memory/replace/dmd/README

@ -1,2 +0,0 @@
This is DMD. See https://wiki.mozilla.org/Performance/MemShrink/DMD for
details on how to use it.

261
memory/replace/dmd/block_analyzer.py

@ -1,261 +0,0 @@
#!/usr/bin/python
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
# From a scan mode DMD log, extract some information about a
# particular block, such as its allocation stack or which other blocks
# contain pointers to it. This can be useful when investigating leaks
# caused by unknown references to refcounted objects.
import json
import gzip
import sys
import argparse
import re
# The DMD output version this script handles.
outputVersion = 5
# If --ignore-alloc-fns is specified, stack frames containing functions that
# match these strings will be removed from the *start* of stack traces. (Once
# we hit a non-matching frame, any subsequent frames won't be removed even if
# they do match.)
allocatorFns = [
'malloc (',
'replace_malloc',
'replace_calloc',
'replace_realloc',
'replace_memalign',
'replace_posix_memalign',
'malloc_zone_malloc',
'moz_xmalloc',
'moz_xcalloc',
'moz_xrealloc',
'operator new(',
'operator new[](',
'g_malloc',
'g_slice_alloc',
'callocCanGC',
'reallocCanGC',
'vpx_malloc',
'vpx_calloc',
'vpx_realloc',
'vpx_memalign',
'js_malloc',
'js_calloc',
'js_realloc',
'pod_malloc',
'pod_calloc',
'pod_realloc',
'nsTArrayInfallibleAllocator::Malloc',
# This one necessary to fully filter some sequences of allocation functions
# that happen in practice. Note that ??? entries that follow non-allocation
# functions won't be stripped, as explained above.
'???',
]
####
# Command line arguments
def range_1_24(string):
value = int(string)
if value < 1 or value > 24:
msg = '{:s} is not in the range 1..24'.format(string)
raise argparse.ArgumentTypeError(msg)
return value
parser = argparse.ArgumentParser(description='Analyze the heap graph to find out things about an object. \
By default this prints out information about blocks that point to the given block.')
parser.add_argument('dmd_log_file_name',
help='clamped DMD log file name')
parser.add_argument('block',
help='address of the block of interest')
parser.add_argument('--info', dest='info', action='store_true',
default=False,
help='Print out information about the block.')
parser.add_argument('-sfl', '--max-stack-frame-length', type=int,
default=150,
help='Maximum number of characters to print from each stack frame')
parser.add_argument('-a', '--ignore-alloc-fns', action='store_true',
help='ignore allocation functions at the start of traces')
parser.add_argument('-f', '--max-frames', type=range_1_24,
help='maximum number of frames to consider in each trace')
parser.add_argument('-c', '--chain-reports', action='store_true',
help='if only one block is found to hold onto the object, report the next one, too')
####
class BlockData:
def __init__(self, json_block):
self.addr = json_block['addr']
if 'contents' in json_block:
contents = json_block['contents']
else:
contents = []
self.contents = []
for c in contents:
self.contents.append(int(c, 16))
self.req_size = json_block['req']
self.alloc_stack = json_block['alloc']
def print_trace_segment(args, stacks, block):
(traceTable, frameTable) = stacks
for l in traceTable[block.alloc_stack]:
# The 5: is to remove the bogus leading "#00: " from the stack frame.
print ' ', frameTable[l][5:args.max_stack_frame_length]
def show_referrers(args, blocks, stacks, block):
visited = set([])
anyFound = False
while True:
referrers = {}
for b, data in blocks.iteritems():
which_edge = 0
for e in data.contents:
if e == block:
# 8 is the number of bytes per word on a 64-bit system.
# XXX This means that this output will be wrong for logs from 32-bit systems!
referrers.setdefault(b, []).append(8 * which_edge)
anyFound = True
which_edge += 1
for r in referrers:
sys.stdout.write('0x{} size = {} bytes'.format(blocks[r].addr, blocks[r].req_size))
plural = 's' if len(referrers[r]) > 1 else ''
sys.stdout.write(' at byte offset' + plural + ' ' + (', '.join(str(x) for x in referrers[r])))
print
print_trace_segment(args, stacks, blocks[r])
print
if args.chain_reports:
if len(referrers) == 0:
sys.stdout.write('Found no more referrers.\n')
break
if len(referrers) > 1:
sys.stdout.write('Found too many referrers.\n')
break
sys.stdout.write('Chaining to next referrer.\n\n')
for r in referrers:
block = r
if block in visited:
sys.stdout.write('Found a loop.\n')
break
visited.add(block)
else:
break
if not anyFound:
print 'No referrers found.'
def show_block_info(args, blocks, stacks, block):
b = blocks[block]
sys.stdout.write('block: 0x{}\n'.format(b.addr))
sys.stdout.write('requested size: {} bytes\n'.format(b.req_size))
sys.stdout.write('\n')
sys.stdout.write('block contents: ')
for c in b.contents:
v = '0' if c == 0 else blocks[c].addr
sys.stdout.write('0x{} '.format(v))
sys.stdout.write('\n\n')
sys.stdout.write('allocation stack:\n')
print_trace_segment(args, stacks, b)
return
def cleanupTraceTable(args, frameTable, traceTable):
# Remove allocation functions at the start of traces.
if args.ignore_alloc_fns:
# Build a regexp that matches every function in allocatorFns.
escapedAllocatorFns = map(re.escape, allocatorFns)
fn_re = re.compile('|'.join(escapedAllocatorFns))
# Remove allocator fns from each stack trace.
for traceKey, frameKeys in traceTable.items():
numSkippedFrames = 0
for frameKey in frameKeys:
frameDesc = frameTable[frameKey]
if re.search(fn_re, frameDesc):
numSkippedFrames += 1
else:
break
if numSkippedFrames > 0:
traceTable[traceKey] = frameKeys[numSkippedFrames:]
# Trim the number of frames.
for traceKey, frameKeys in traceTable.items():
if len(frameKeys) > args.max_frames:
traceTable[traceKey] = frameKeys[:args.max_frames]
def loadGraph(options):
# Handle gzipped input if necessary.
isZipped = options.dmd_log_file_name.endswith('.gz')
opener = gzip.open if isZipped else open
with opener(options.dmd_log_file_name, 'rb') as f:
j = json.load(f)
if j['version'] != outputVersion:
raise Exception("'version' property isn't '{:d}'".format(outputVersion))
invocation = j['invocation']
block_list = j['blockList']
blocks = {}
for json_block in block_list:
blocks[int(json_block['addr'], 16)] = BlockData(json_block)
traceTable = j['traceTable']
frameTable = j['frameTable']
cleanupTraceTable(options, frameTable, traceTable)
return (blocks, (traceTable, frameTable))
def analyzeLogs():
options = parser.parse_args()
(blocks, stacks) = loadGraph(options)
block = int(options.block, 16)
if not block in blocks:
print 'Object', block, 'not found in traces.'
print 'It could still be the target of some nodes.'
return
if options.info:
show_block_info(options, blocks, stacks, block)
return
show_referrers(options, blocks, stacks, block)
if __name__ == "__main__":
analyzeLogs()

890
memory/replace/dmd/dmd.py

@ -1,890 +0,0 @@
#! /usr/bin/env python
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
'''This script analyzes a JSON file emitted by DMD.'''
from __future__ import print_function, division
import argparse
import collections
import gzip
import json
import os
import platform
import re
import shutil
import sys
import tempfile
from bisect import bisect_right
# The DMD output version this script handles.
outputVersion = 5
# If --ignore-alloc-fns is specified, stack frames containing functions that
# match these strings will be removed from the *start* of stack traces. (Once
# we hit a non-matching frame, any subsequent frames won't be removed even if
# they do match.)
allocatorFns = [
# Matches malloc, replace_malloc, moz_xmalloc, vpx_malloc, js_malloc, pod_malloc, malloc_zone_*, g_malloc.
'malloc',
# Matches calloc, replace_calloc, moz_xcalloc, vpx_calloc, js_calloc, pod_calloc, malloc_zone_calloc, pod_callocCanGC.
'calloc',
# Matches realloc, replace_realloc, moz_xrealloc, vpx_realloc, js_realloc, pod_realloc, pod_reallocCanGC.
'realloc',
# Matches memalign, posix_memalign, replace_memalign, replace_posix_memalign, moz_xmemalign, moz_xposix_memalign, vpx_memalign, malloc_zone_memalign.
'memalign',
'operator new(',
'operator new[](',
'g_slice_alloc',
# This one necessary to fully filter some sequences of allocation functions
# that happen in practice. Note that ??? entries that follow non-allocation
# functions won't be stripped, as explained above.
'???',
]
class Record(object):
'''A record is an aggregation of heap blocks that have identical stack
traces. It can also be used to represent the difference between two
records.'''
def __init__(self):
self.numBlocks = 0
self.reqSize = 0
self.slopSize = 0
self.usableSize = 0
self.allocatedAtDesc = None
self.reportedAtDescs = []
self.usableSizes = collections.defaultdict(int)
def isZero(self, args):
return self.numBlocks == 0 and \
self.reqSize == 0 and \
self.slopSize == 0 and \
self.usableSize == 0 and \
len(self.usableSizes) == 0
def negate(self):
self.numBlocks = -self.numBlocks
self.reqSize = -self.reqSize
self.slopSize = -self.slopSize
self.usableSize = -self.usableSize
negatedUsableSizes = collections.defaultdict(int)
for usableSize, count in self.usableSizes.items():
negatedUsableSizes[-usableSize] = count
self.usableSizes = negatedUsableSizes
def subtract(self, r):
# We should only be calling this on records with matching stack traces.
# Check this.
assert self.allocatedAtDesc == r.allocatedAtDesc
assert self.reportedAtDescs == r.reportedAtDescs
self.numBlocks -= r.numBlocks
self.reqSize -= r.reqSize
self.slopSize -= r.slopSize
self.usableSize -= r.usableSize
usableSizes1 = self.usableSizes
usableSizes2 = r.usableSizes
usableSizes3 = collections.defaultdict(int)
for usableSize in usableSizes1:
counts1 = usableSizes1[usableSize]
if usableSize in usableSizes2:
counts2 = usableSizes2[usableSize]
del usableSizes2[usableSize]
counts3 = counts1 - counts2
if counts3 != 0:
if counts3 < 0:
usableSize = -usableSize
counts3 = -counts3
usableSizes3[usableSize] = counts3
else:
usableSizes3[usableSize] = counts1
for usableSize in usableSizes2:
usableSizes3[-usableSize] = usableSizes2[usableSize]
self.usableSizes = usableSizes3
@staticmethod
def cmpByUsableSize(r1, r2):
# Sort by usable size, then by req size.
return cmp(abs(r1.usableSize), abs(r2.usableSize)) or \
Record.cmpByReqSize(r1, r2)
@staticmethod
def cmpByReqSize(r1, r2):
# Sort by req size.
return cmp(abs(r1.reqSize), abs(r2.reqSize))
@staticmethod
def cmpBySlopSize(r1, r2):
# Sort by slop size.
return cmp(abs(r1.slopSize), abs(r2.slopSize))
@staticmethod
def cmpByNumBlocks(r1, r2):
# Sort by block counts, then by usable size.
return cmp(abs(r1.numBlocks), abs(r2.numBlocks)) or \
Record.cmpByUsableSize(r1, r2)
sortByChoices = {
'usable': Record.cmpByUsableSize, # the default
'req': Record.cmpByReqSize,
'slop': Record.cmpBySlopSize,
'num-blocks': Record.cmpByNumBlocks,
}
def parseCommandLine():
# 24 is the maximum number of frames that DMD will produce.
def range_1_24(string):
value = int(string)
if value < 1 or value > 24:
msg = '{:s} is not in the range 1..24'.format(string)
raise argparse.ArgumentTypeError(msg)
return value
description = '''
Analyze heap data produced by DMD.
If one file is specified, analyze it; if two files are specified, analyze the
difference.
Input files can be gzipped.
Write to stdout unless -o/--output is specified.
Stack traces are fixed to show function names, filenames and line numbers
unless --no-fix-stacks is specified; stack fixing modifies the original file
and may take some time. If specified, the BREAKPAD_SYMBOLS_PATH environment
variable is used to find breakpad symbols for stack fixing.
'''
p = argparse.ArgumentParser(description=description)
p.add_argument('-o', '--output', type=argparse.FileType('w'),
help='output file; stdout if unspecified')
p.add_argument('-f', '--max-frames', type=range_1_24,
help='maximum number of frames to consider in each trace')
p.add_argument('-s', '--sort-by', choices=sortByChoices.keys(),
default='usable',
help='sort the records by a particular metric')
p.add_argument('-a', '--ignore-alloc-fns', action='store_true',
help='ignore allocation functions at the start of traces')
p.add_argument('--no-fix-stacks', action='store_true',
help='do not fix stacks')
p.add_argument('--clamp-contents', action='store_true',
help='for a scan mode log, clamp addresses to the start of live blocks, or zero if not in one')
p.add_argument('--print-clamp-stats', action='store_true',
help='print information about the results of pointer clamping; mostly useful for debugging clamping')
p.add_argument('--filter-stacks-for-testing', action='store_true',
help='filter stack traces; only useful for testing purposes')
p.add_argument('input_file',
help='a file produced by DMD')
p.add_argument('input_file2', nargs='?',
help='a file produced by DMD; if present, it is diff\'d with input_file')
return p.parse_args(sys.argv[1:])
# Fix stacks if necessary: first write the output to a tempfile, then replace
# the original file with it.
def fixStackTraces(inputFilename, isZipped, opener):
# This append() call is needed to make the import statements work when this
# script is installed as a symlink.
sys.path.append(os.path.dirname(__file__))
bpsyms = os.environ.get('BREAKPAD_SYMBOLS_PATH', None)
sysname = platform.system()
if bpsyms and os.path.exists(bpsyms):
import fix_stack_using_bpsyms as fixModule
fix = lambda line: fixModule.fixSymbols(line, bpsyms)
elif sysname == 'Linux':
import fix_linux_stack as fixModule
fix = lambda line: fixModule.fixSymbols(line)
elif sysname == 'Darwin':
import fix_macosx_stack as fixModule
fix = lambda line: fixModule.fixSymbols(line)
else:
fix = None # there is no fix script for Windows
if fix:
# Fix stacks, writing output to a temporary file, and then
# overwrite the original file.
tmpFile = tempfile.NamedTemporaryFile(delete=False)
# If the input is gzipped, then the output (written initially to
# |tmpFile|) should be gzipped as well.
#
# And we want to set its pre-gzipped filename to '' rather than the
# name of the temporary file, so that programs like the Unix 'file'
# utility don't say that it was called 'tmp6ozTxE' (or something like
# that) before it was zipped. So that explains the |filename=''|
# parameter.
#
# But setting the filename like that clobbers |tmpFile.name|, so we
# must get that now in order to move |tmpFile| at the end.
tmpFilename = tmpFile.name
if isZipped:
tmpFile = gzip.GzipFile(filename='', fileobj=tmpFile)
with opener(inputFilename, 'rb') as inputFile:
for line in inputFile:
tmpFile.write(fix(line))
tmpFile.close()
shutil.move(tmpFilename, inputFilename)
def getDigestFromFile(args, inputFile):
# Handle gzipped input if necessary.
isZipped = inputFile.endswith('.gz')
opener = gzip.open if isZipped else open
# Fix stack traces unless otherwise instructed.
if not args.no_fix_stacks:
fixStackTraces(inputFile, isZipped, opener)
if args.clamp_contents:
clampBlockList(args, inputFile, isZipped, opener)
with opener(inputFile, 'rb') as f:
j = json.load(f)
if j['version'] != outputVersion:
raise Exception("'version' property isn't '{:d}'".format(outputVersion))
# Extract the main parts of the JSON object.
invocation = j['invocation']
dmdEnvVar = invocation['dmdEnvVar']
mode = invocation['mode']
blockList = j['blockList']
traceTable = j['traceTable']
frameTable = j['frameTable']
# Insert the necessary entries for unrecorded stack traces. Note that 'ut'
# and 'uf' will not overlap with any keys produced by DMD's
# ToIdStringConverter::Base32() function.
unrecordedTraceID = 'ut'
unrecordedFrameID = 'uf'
traceTable[unrecordedTraceID] = [unrecordedFrameID]
frameTable[unrecordedFrameID] = \
'#00: (no stack trace recorded due to --stacks=partial)'
# For the purposes of this script, 'scan' behaves like 'live'.
if mode == 'scan':
mode = 'live'
if not mode in ['live', 'dark-matter', 'cumulative']:
raise Exception("bad 'mode' property: '{:s}'".format(mode))
# Remove allocation functions at the start of traces.
if args.ignore_alloc_fns:
# Build a regexp that matches every function in allocatorFns.
escapedAllocatorFns = map(re.escape, allocatorFns)
fn_re = re.compile('|'.join(escapedAllocatorFns))
# Remove allocator fns from each stack trace.
for traceKey, frameKeys in traceTable.items():
numSkippedFrames = 0
for frameKey in frameKeys:
frameDesc = frameTable[frameKey]
if re.search(fn_re, frameDesc):
numSkippedFrames += 1
else:
break
if numSkippedFrames > 0:
traceTable[traceKey] = frameKeys[numSkippedFrames:]
# Trim the number of frames.
for traceKey, frameKeys in traceTable.items():
if len(frameKeys) > args.max_frames:
traceTable[traceKey] = frameKeys[:args.max_frames]
def buildTraceDescription(traceTable, frameTable, traceKey):
frameKeys = traceTable[traceKey]
fmt = ' #{:02d}{:}'
if args.filter_stacks_for_testing:
# When running SmokeDMD.cpp, every stack trace should contain at
# least one frame that contains 'DMD.cpp', from either |DMD.cpp| or
# |SmokeDMD.cpp|. (Or 'dmd.cpp' on Windows.) If we see such a
# frame, we replace the entire stack trace with a single,
# predictable frame. There is too much variation in the stack
# traces across different machines and platforms to do more precise
# matching, but this level of matching will result in failure if
# stack fixing fails completely.
for frameKey in frameKeys:
frameDesc = frameTable[frameKey]
if 'DMD.cpp' in frameDesc or 'dmd.cpp' in frameDesc:
return [fmt.format(1, ': ... DMD.cpp ...')]
# The frame number is always '#00' (see DMD.h for why), so we have to
# replace that with the correct frame number.
desc = []
for n, frameKey in enumerate(traceTable[traceKey], start=1):
desc.append(fmt.format(n, frameTable[frameKey][3:]))
return desc
# Aggregate blocks into records. All sufficiently similar blocks go into a
# single record.
if mode in ['live', 'cumulative']:
liveOrCumulativeRecords = collections.defaultdict(Record)
elif mode == 'dark-matter':
unreportedRecords = collections.defaultdict(Record)
onceReportedRecords = collections.defaultdict(Record)
twiceReportedRecords = collections.defaultdict(Record)
heapUsableSize = 0
heapBlocks = 0
recordKeyPartCache = {}
for block in blockList:
# For each block we compute a |recordKey|, and all blocks with the same
# |recordKey| are aggregated into a single record. The |recordKey| is
# derived from the block's 'alloc' and 'reps' (if present) stack
# traces.
#
# We use frame descriptions (e.g. "#00: foo (X.cpp:99)") when comparing
# traces for equality. We can't use trace keys or frame keys because
# they're not comparable across different DMD runs (which is relevant
# when doing diffs).
#
# Using frame descriptions also fits in with the stack trimming done
# for --max-frames, which requires that stack traces with common
# beginnings but different endings to be considered equivalent. E.g. if
# we have distinct traces T1:[A:D1,B:D2,C:D3] and T2:[X:D1,Y:D2,Z:D4]
# and we trim the final frame of each they should be considered
# equivalent because the untrimmed frame descriptions (D1 and D2)
# match.
#
# Having said all that, during a single invocation of dmd.py on a
# single DMD file, for a single frameKey value the record key will
# always be the same, and we might encounter it 1000s of times. So we
# cache prior results for speed.
def makeRecordKeyPart(traceKey):
if traceKey in recordKeyPartCache:
return recordKeyPartCache[traceKey]
recordKeyPart = str(map(lambda frameKey: frameTable[frameKey],
traceTable[traceKey]))
recordKeyPartCache[traceKey] = recordKeyPart
return recordKeyPart
allocatedAtTraceKey = block.get('alloc', unrecordedTraceID)
if mode in ['live', 'cumulative']:
recordKey = makeRecordKeyPart(allocatedAtTraceKey)
records = liveOrCumulativeRecords
elif mode == 'dark-matter':
recordKey = makeRecordKeyPart(allocatedAtTraceKey)
if 'reps' in block:
reportedAtTraceKeys = block['reps']
for reportedAtTraceKey in reportedAtTraceKeys:
recordKey += makeRecordKeyPart(reportedAtTraceKey)
if len(reportedAtTraceKeys) == 1:
records = onceReportedRecords
else:
records = twiceReportedRecords
else:
records = unreportedRecords
record = records[recordKey]
if 'req' not in block:
raise Exception("'req' property missing in block'")
reqSize = block['req']
slopSize = block.get('slop', 0)
if 'num' in block:
num = block['num']
else:
num = 1
usableSize = reqSize + slopSize
heapUsableSize += num * usableSize
heapBlocks += num
record.numBlocks += num
record.reqSize += num * reqSize
record.slopSize += num * slopSize
record.usableSize += num * usableSize
if record.allocatedAtDesc == None:
record.allocatedAtDesc = \
buildTraceDescription(traceTable, frameTable,
allocatedAtTraceKey)
if mode in ['live', 'cumulative']:
pass
elif mode == 'dark-matter':
if 'reps' in block and record.reportedAtDescs == []:
f = lambda k: buildTraceDescription(traceTable, frameTable, k)
record.reportedAtDescs = map(f, reportedAtTraceKeys)
record.usableSizes[usableSize] += num
# All the processed data for a single DMD file is called a "digest".
digest = {}
digest['dmdEnvVar'] = dmdEnvVar
digest['mode'] = mode
digest['heapUsableSize'] = heapUsableSize
digest['heapBlocks'] = heapBlocks
if mode in ['live', 'cumulative']:
digest['liveOrCumulativeRecords'] = liveOrCumulativeRecords
elif mode == 'dark-matter':
digest['unreportedRecords'] = unreportedRecords
digest['onceReportedRecords'] = onceReportedRecords
digest['twiceReportedRecords'] = twiceReportedRecords
return digest
def diffRecords(args, records1, records2):
records3 = {}
# Process records1.
for k in records1:
r1 = records1[k]
if k in records2:
# This record is present in both records1 and records2.
r2 = records2[k]
del records2[k]
r2.subtract(r1)
if not r2.isZero(args):
records3[k] = r2
else:
# This record is present only in records1.
r1.negate()
records3[k] = r1
for k in records2:
# This record is present only in records2.
records3[k] = records2[k]
return records3
def diffDigests(args, d1, d2):
if (d1['mode'] != d2['mode']):
raise Exception("the input files have different 'mode' properties")
d3 = {}
d3['dmdEnvVar'] = (d1['dmdEnvVar'], d2['dmdEnvVar'])
d3['mode'] = d1['mode']
d3['heapUsableSize'] = d2['heapUsableSize'] - d1['heapUsableSize']
d3['heapBlocks'] = d2['heapBlocks'] - d1['heapBlocks']
if d1['mode'] in ['live', 'cumulative']:
d3['liveOrCumulativeRecords'] = \
diffRecords(args, d1['liveOrCumulativeRecords'],
d2['liveOrCumulativeRecords'])
elif d1['mode'] == 'dark-matter':
d3['unreportedRecords'] = diffRecords(args, d1['unreportedRecords'],
d2['unreportedRecords'])
d3['onceReportedRecords'] = diffRecords(args, d1['onceReportedRecords'],
d2['onceReportedRecords'])
d3['twiceReportedRecords'] = diffRecords(args, d1['twiceReportedRecords'],
d2['twiceReportedRecords'])
return d3
def printDigest(args, digest):
dmdEnvVar = digest['dmdEnvVar']
mode = digest['mode']
heapUsableSize = digest['heapUsableSize']
heapBlocks = digest['heapBlocks']
if mode in ['live', 'cumulative']:
liveOrCumulativeRecords = digest['liveOrCumulativeRecords']
elif mode == 'dark-matter':
unreportedRecords = digest['unreportedRecords']
onceReportedRecords = digest['onceReportedRecords']
twiceReportedRecords = digest['twiceReportedRecords']
separator = '#' + '-' * 65 + '\n'
def number(n):
'''Format a number with comma as a separator.'''
return '{:,d}'.format(n)
def perc(m, n):
return 0 if n == 0 else (100 * m / n)
def plural(n):
return '' if n == 1 else 's'
# Prints to stdout, or to file if -o/--output was specified.
def out(*arguments, **kwargs):
print(*arguments, file=args.output, **kwargs)
def printStack(traceDesc):
for frameDesc in traceDesc:
out(frameDesc)
def printRecords(recordKind, records, heapUsableSize):
RecordKind = recordKind.capitalize()
out(separator)
numRecords = len(records)
cmpRecords = sortByChoices[args.sort_by]
sortedRecords = sorted(records.values(), cmp=cmpRecords, reverse=True)
kindBlocks = 0
kindUsableSize = 0
maxRecord = 1000
# First iteration: get totals, etc.
for record in sortedRecords:
kindBlocks += record.numBlocks
kindUsableSize += record.usableSize
# Second iteration: print.
if numRecords == 0:
out('# no {:} heap blocks\n'.format(recordKind))
kindCumulativeUsableSize = 0
for i, record in enumerate(sortedRecords, start=1):
# Stop printing at the |maxRecord|th record.
if i == maxRecord:
out('# {:}: stopping after {:,d} heap block records\n'.
format(RecordKind, i))
break
kindCumulativeUsableSize += record.usableSize
out(RecordKind + ' {')
out(' {:} block{:} in heap block record {:,d} of {:,d}'.
format(number(record.numBlocks),
plural(record.numBlocks), i, numRecords))
out(' {:} bytes ({:} requested / {:} slop)'.
format(number(record.usableSize),
number(record.reqSize),
number(record.slopSize)))
abscmp = lambda (usableSize1, _1), (usableSize2, _2): \
cmp(abs(usableSize1), abs(usableSize2))
usableSizes = sorted(record.usableSizes.items(), cmp=abscmp,
reverse=True)
hasSingleBlock = len(usableSizes) == 1 and usableSizes[0][1] == 1
if not hasSingleBlock:
out(' Individual block sizes: ', end='')
if len(usableSizes) == 0:
out('(no change)', end='')
else:
isFirst = True
for usableSize, count in usableSizes:
if not isFirst:
out('; ', end='')
out('{:}'.format(number(usableSize)), end='')
if count > 1:
out(' x {:,d}'.format(count), end='')
isFirst = False
out()
out(' {:4.2f}% of the heap ({:4.2f}% cumulative)'.
format(perc(record.usableSize, heapUsableSize),
perc(kindCumulativeUsableSize, heapUsableSize)))
if mode in ['live', 'cumulative']:
pass
elif mode == 'dark-matter':
out(' {:4.2f}% of {:} ({:4.2f}% cumulative)'.
format(perc(record.usableSize, kindUsableSize),
recordKind,
perc(kindCumulativeUsableSize, kindUsableSize)))
out(' Allocated at {')
printStack(record.allocatedAtDesc)
out(' }')
if mode in ['live', 'cumulative']:
pass
elif mode == 'dark-matter':
for n, reportedAtDesc in enumerate(record.reportedAtDescs):
again = 'again ' if n > 0 else ''
out(' Reported {:}at {{'.format(again))
printStack(reportedAtDesc)
out(' }')
out('}\n')
return (kindUsableSize, kindBlocks)
def printInvocation(n, dmdEnvVar, mode):
out('Invocation{:} {{'.format(n))
if dmdEnvVar == None:
out(' $DMD is undefined')
else:
out(' $DMD = \'' + dmdEnvVar + '\'')
out(' Mode = \'' + mode + '\'')
out('}\n')
# Print command line. Strip dirs so the output is deterministic, which is
# needed for testing.
out(separator, end='')
out('# ' + ' '.join(map(os.path.basename, sys.argv)) + '\n')
# Print invocation(s).
if type(dmdEnvVar) is not tuple:
printInvocation('', dmdEnvVar, mode)
else:
printInvocation(' 1', dmdEnvVar[0], mode)
printInvocation(' 2', dmdEnvVar[1], mode)
# Print records.
if mode in ['live', 'cumulative']:
liveOrCumulativeUsableSize, liveOrCumulativeBlocks = \
printRecords(mode, liveOrCumulativeRecords, heapUsableSize)
elif mode == 'dark-matter':
twiceReportedUsableSize, twiceReportedBlocks = \
printRecords('twice-reported', twiceReportedRecords, heapUsableSize)
unreportedUsableSize, unreportedBlocks = \
printRecords('unreported', unreportedRecords, heapUsableSize)
onceReportedUsableSize, onceReportedBlocks = \
printRecords('once-reported', onceReportedRecords, heapUsableSize)
# Print summary.
out(separator)
out('Summary {')
if mode in ['live', 'cumulative']:
out(' Total: {:} bytes in {:} blocks'.
format(number(liveOrCumulativeUsableSize),
number(liveOrCumulativeBlocks)))
elif mode == 'dark-matter':
fmt = ' {:15} {:>12} bytes ({:6.2f}%) in {:>7} blocks ({:6.2f}%)'
out(fmt.
format('Total:',
number(heapUsableSize),
100,
number(heapBlocks),
100))
out(fmt.
format('Unreported:',
number(unreportedUsableSize),
perc(unreportedUsableSize, heapUsableSize),
number(unreportedBlocks),
perc(unreportedBlocks, heapBlocks)))
out(fmt.
format('Once-reported:',
number(onceReportedUsableSize),
perc(onceReportedUsableSize, heapUsableSize),
number(onceReportedBlocks),
perc(onceReportedBlocks, heapBlocks)))
out(fmt.
format('Twice-reported:',
number(twiceReportedUsableSize),
perc(twiceReportedUsableSize, heapUsableSize),
number(twiceReportedBlocks),
perc(twiceReportedBlocks, heapBlocks)))
out('}\n')
#############################
# Pretty printer for DMD JSON
#############################
def prettyPrintDmdJson(out, j):
out.write('{\n')
out.write(' "version": {0},\n'.format(j['version']))
out.write(' "invocation": ')
json.dump(j['invocation'], out, sort_keys=True)
out.write(',\n')
out.write(' "blockList": [')
first = True
for b in j['blockList']:
out.write('' if first else ',')
out.write('\n ')
json.dump(b, out, sort_keys=True)
first = False
out.write('\n ],\n')
out.write(' "traceTable": {')
first = True
for k, l in j['traceTable'].iteritems():
out.write('' if first else ',')
out.write('\n "{0}": {1}'.format(k, json.dumps(l)))
first = False
out.write('\n },\n')
out.write(' "frameTable": {')
first = True
for k, v in j['frameTable'].iteritems():
out.write('' if first else ',')<