Efficiently accessing arbitrarily deep dictionaries

Question

I got a 20% performance boost by tightening up the code a bit but a whopping 400% increase by using a cache for split strings. That only makes a difference if you use the same spec multiple times. Here are sample implementations and a profile script to test.

test.py

mydict = {
    'first': {
        'second': {
            'third': {
                'fourth': 'the end'
             }
         }
     }
}

# original
def get_entry(dict, keyspec):
    keys = keyspec.split('.')

    result = dict[keys[0]]
    for key in keys[1:]:
       result = result[key]

    return result

# tighten up code
def get_entry_2(mydict, keyspec):
    for key in keyspec.split('.'):
        mydict = mydict[key]
    return mydict

# use a cache
cache = {}
def get_entry_3(mydict, keyspec):
    global cache
    try:
        spec = cache[keyspec]
    except KeyError:
        spec = tuple(keyspec.split('.'))
        cache[keyspec] = spec

    for key in spec:
        mydict = mydict[key]
    return mydict

if __name__ == "__main__":
    test = get_entry(mydict, 'first.second.third.fourth')
    print(test)

profile.py

from timeit import timeit
print("original get_entry")
print(timeit("get_entry(mydict, 'first.second.third.fourth')",
    setup="from test import get_entry, mydict"))

print("get_entry_2 with tighter code")
print(timeit("get_entry_2(mydict, 'first.second.third.fourth')",
    setup="from test import get_entry_2, mydict"))

print("get_entry_3 with cache of split spec")
print(timeit("get_entry_3(mydict, 'first.second.third.fourth')",
    setup="from test import get_entry_3, mydict"))

print("just splitting a spec")
print(timeit("x.split('.')", setup="x='first.second.third.fourth'"))

The timing on my machine is

original get_entry
4.148535753000033
get_entry_2 with tighter code
3.2986323120003362
get_entry_3 with cache of split spec
1.3073233439990872
just splitting a spec
1.0949148639992927

Notice that splitting the spec is a comparatively expensive operation for this function. That’s why caching helps.

Leave a Comment Cancel reply