I got a 20% performance boost by tightening up the code a bit but a whopping 400% increase by using a cache for split strings. That only makes a difference if you use the same spec multiple times. Here are sample implementations and a profile script to test.
test.py
mydict = {
'first': {
'second': {
'third': {
'fourth': 'the end'
}
}
}
}
# original
def get_entry(dict, keyspec):
keys = keyspec.split('.')
result = dict[keys[0]]
for key in keys[1:]:
result = result[key]
return result
# tighten up code
def get_entry_2(mydict, keyspec):
for key in keyspec.split('.'):
mydict = mydict[key]
return mydict
# use a cache
cache = {}
def get_entry_3(mydict, keyspec):
global cache
try:
spec = cache[keyspec]
except KeyError:
spec = tuple(keyspec.split('.'))
cache[keyspec] = spec
for key in spec:
mydict = mydict[key]
return mydict
if __name__ == "__main__":
test = get_entry(mydict, 'first.second.third.fourth')
print(test)
profile.py
from timeit import timeit
print("original get_entry")
print(timeit("get_entry(mydict, 'first.second.third.fourth')",
setup="from test import get_entry, mydict"))
print("get_entry_2 with tighter code")
print(timeit("get_entry_2(mydict, 'first.second.third.fourth')",
setup="from test import get_entry_2, mydict"))
print("get_entry_3 with cache of split spec")
print(timeit("get_entry_3(mydict, 'first.second.third.fourth')",
setup="from test import get_entry_3, mydict"))
print("just splitting a spec")
print(timeit("x.split('.')", setup="x='first.second.third.fourth'"))
The timing on my machine is
original get_entry
4.148535753000033
get_entry_2 with tighter code
3.2986323120003362
get_entry_3 with cache of split spec
1.3073233439990872
just splitting a spec
1.0949148639992927
Notice that splitting the spec is a comparatively expensive operation for this function. That’s why caching helps.