Building and Traversing Nested dictionaries with Python

Building and traversing dict trees.

Resources

In [1]:
from collections import defaultdict
In [2]:
import dpath.util
In [3]:
from chamelboots.datautils import get_from
from chamelboots.constants import FAKE
In [4]:
def tree():
    return defaultdict(tree)
In [5]:
def dicts(d):
    return {k: dicts(v) for k, v in d.items()}
In [6]:
def add(root, sequence):
    for k in sequence:
        root = root[k]
In [7]:
slash_join = "/".join
In [8]:
root = tree()
path = "top html".split()
add(root, path)
d = dicts(root)
d
Out[8]:
{'top': {'html': {}}}
In [9]:
dpath.util.set(
    d,
    slash_join(path),
    dict(
        head=[{k: {}} for k in "meta meta script script link title".split()],
        body=[{k: [{"p": FAKE.catch_phrase()} for p in range(5)]} for k in ("div",)],
    ),
)
d["top"]["html"].keys()
Out[9]:
dict_keys(['head', 'body'])

Stack Overflow: Loop through all nested dictionary values?

From the Stack Overflow post:

I'm trying to loop through a dictionary and print out all key value pairs where the value is not a nested dictionary. If the value is a dictionary I want to go into it and print out its key value pairs...etc.

In [10]:
import itertools as it
In [11]:
dd = tree()
isinstance(dd, dict)
Out[11]:
True
In [12]:
def paths_from(d, paths=(), acc=None):
    """Generate paths from nested dict :d:."""
    containers = (dict, list, tuple)
    non_dicts = containers[1:]
    
    acc = list() if acc is None else acc
    stack = list(d.items())
    visited = set()
    while stack:
        k, v = stack.pop()
        paths = (*paths, (k,))
        acc.append(tuple(it.chain.from_iterable(paths)))
        if not any(isinstance(v, type_) for type_ in containers): # leaf
            paths = paths[:-1] # slice off all except last for non-container leaf
        elif isinstance(v, dict): # dict node
            if k not in visited:
                stack.extend(v.items())
        elif any(isinstance(v, type_) for type_ in non_dicts): # list node
            for i in range(len(v)):
                paths = (*paths[:-1], (k, i))
                paths_from(v[i], paths, acc)
            paths = paths[:-1] # slice off all except last for new array item
        
        visited.add(k)
    return acc
In [13]:
KEY = slash_join((str(item) for item in ("top", "html", "body", 0, "div", 3, "p")))
KEY
Out[13]:
'top/html/body/0/div/3/p'
In [14]:
%%timeit

dpath.util.get(d, KEY)
685 µs ± 11.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
In [15]:
%%timeit

get_from(d, ('top', 'html', 'body', 0, 'div', 3, 'p'))
1.94 µs ± 20 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)
In [16]:
from chamelboots.html.packages.bootstrap import starter_html
from chamelboots.html import get_html_as_data
from chamelboots.html.utils import prettify_html

data = get_html_as_data(prettify_html(starter_html))
In [17]:
data.keys()
Out[17]:
dict_keys(['html'])
In [18]:
data['html'].keys()
Out[18]:
dict_keys(['inner_content', 'attribs', 'tail'])
In [19]:
for path in paths_from(data):
    print(get_from(data, path))
{'inner_content': [{'body': {'inner_content': [{'p': {'inner_content': 'tml>\n\n ', 'attributes': {}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Required meta tags ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'charset': 'utf-8'}, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Bootstrap CSS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'link': {'inner_content': None, 'attributes': {'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Optional JavaScript ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}, 'tail': '\n  '}}, {'title': {'inner_content': '\n   Bootstrap title\n  ', 'attributes': {}, 'tail': '\n \n \n  '}}, {'div': {'inner_content': [{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}], 'attributes': {}, 'tail': '\n \n'}}], 'attributes': {}, 'tail': None}}], 'attribs': {}, 'tail': None}
None
{}
[{'body': {'inner_content': [{'p': {'inner_content': 'tml>\n\n ', 'attributes': {}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Required meta tags ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'charset': 'utf-8'}, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Bootstrap CSS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'link': {'inner_content': None, 'attributes': {'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Optional JavaScript ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}, 'tail': '\n  '}}, {'title': {'inner_content': '\n   Bootstrap title\n  ', 'attributes': {}, 'tail': '\n \n \n  '}}, {'div': {'inner_content': [{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}], 'attributes': {}, 'tail': '\n \n'}}], 'attributes': {}, 'tail': None}}]
{'inner_content': [{'p': {'inner_content': 'tml>\n\n ', 'attributes': {}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Required meta tags ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'charset': 'utf-8'}, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Bootstrap CSS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'link': {'inner_content': None, 'attributes': {'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Optional JavaScript ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}, 'tail': '\n  '}}, {'title': {'inner_content': '\n   Bootstrap title\n  ', 'attributes': {}, 'tail': '\n \n \n  '}}, {'div': {'inner_content': [{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}], 'attributes': {}, 'tail': '\n \n'}}], 'attributes': {}, 'tail': None}
None
{}
[{'p': {'inner_content': 'tml>\n\n ', 'attributes': {}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Required meta tags ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'charset': 'utf-8'}, 'tail': '\n  '}}, {'meta': {'inner_content': None, 'attributes': {'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Bootstrap CSS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'link': {'inner_content': None, 'attributes': {'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' Optional JavaScript ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {<cyfunction Comment at 0x7fe25816a870>: {'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}, 'tail': '\n  '}}, {'script': {'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}, 'tail': '\n  '}}, {'title': {'inner_content': '\n   Bootstrap title\n  ', 'attributes': {}, 'tail': '\n \n \n  '}}, {'div': {'inner_content': [{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}], 'attributes': {}, 'tail': '\n \n'}}]
{'inner_content': 'tml>\n\n ', 'attributes': {}, 'tail': '\n  '}

  
{}
tml>

 
{'inner_content': ' Required meta tags ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}

  
<lxml.etree._ImmutableMapping object at 0x7fe258154b40>
 Required meta tags 
{'inner_content': None, 'attributes': {'charset': 'utf-8'}, 'tail': '\n  '}

  
{'charset': 'utf-8'}
None
{'inner_content': None, 'attributes': {'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}, 'tail': '\n  '}

  
{'content': 'width=device-width, initial-scale=1, shrink-to-fit=no', 'name': 'viewport'}
None
{'inner_content': ' Bootstrap CSS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}

  
<lxml.etree._ImmutableMapping object at 0x7fe258154b40>
 Bootstrap CSS 
{'inner_content': None, 'attributes': {'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}, 'tail': '\n  '}

  
{'crossorigin': 'anonymous', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'rel': 'stylesheet'}
None
{'inner_content': ' Optional JavaScript ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}

  
<lxml.etree._ImmutableMapping object at 0x7fe258154b40>
 Optional JavaScript 
{'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ', 'attributes': <lxml.etree._ImmutableMapping object at 0x7fe258154b40>, 'tail': '\n  '}

  
<lxml.etree._ImmutableMapping object at 0x7fe258154b40>
 jQuery first, then Popper.js, then Bootstrap JS 
{'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}, 'tail': '\n  '}

  
{'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js'}

  
{'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}, 'tail': '\n  '}

  
{'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'}

  
{'inner_content': '\n  ', 'attributes': {'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}, 'tail': '\n  '}

  
{'crossorigin': 'anonymous', 'defer': 'defer', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'}

  
{'inner_content': '\n   Bootstrap title\n  ', 'attributes': {}, 'tail': '\n \n \n  '}

 
 
  
{}

   Bootstrap title
  
{'inner_content': [{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}], 'attributes': {}, 'tail': '\n \n'}

 

{}
[{'h1': {'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}}]
{'inner_content': '\n    Hello, world!\n   ', 'attributes': {}, 'tail': '\n  '}

  
{}

    Hello, world!