Using Python’s gc
garbage collector interface and sys.getsizeof()
it’s possible to dump all the python objects and their sizes. Here’s the code I’m using in production to troubleshoot a memory leak:
rss = psutil.Process(os.getpid()).get_memory_info().rss
# Dump variables if using more than 100MB of memory
if rss > 100 * 1024 * 1024:
memory_dump()
os.abort()
def memory_dump():
dump = open("memory.pickle", 'wb')
xs = []
for obj in gc.get_objects():
i = id(obj)
size = sys.getsizeof(obj, 0)
# referrers = [id(o) for o in gc.get_referrers(obj) if hasattr(o, '__class__')]
referents = [id(o) for o in gc.get_referents(obj) if hasattr(o, '__class__')]
if hasattr(obj, '__class__'):
cls = str(obj.__class__)
xs.append({'id': i, 'class': cls, 'size': size, 'referents': referents})
cPickle.dump(xs, dump)
Note that I’m only saving data from objects that have a __class__
attribute because those are the only objects I care about. It should be possible to save the complete list of objects, but you will need to take care choosing other attributes. Also, I found that getting the referrers for each object was extremely slow so I opted to save only the referents. Anyway, after the crash, the resulting pickled data can be read back like this:
with open("memory.pickle", 'rb') as dump:
objs = cPickle.load(dump)
Added 2017-11-15
The Python 3.6 version is here:
import gc
import sys
import _pickle as cPickle
def memory_dump():
with open("memory.pickle", 'wb') as dump:
xs = []
for obj in gc.get_objects():
i = id(obj)
size = sys.getsizeof(obj, 0)
# referrers = [id(o) for o in gc.get_referrers(obj) if hasattr(o, '__class__')]
referents = [id(o) for o in gc.get_referents(obj) if hasattr(o, '__class__')]
if hasattr(obj, '__class__'):
cls = str(obj.__class__)
xs.append({'id': i, 'class': cls, 'size': size, 'referents': referents})
cPickle.dump(xs, dump)