Update the Metadata on Every Save in a Jupyter Notebook

I am using Jupyter notebooks to create blog posts in the Nikola static site generator.

I have built a workflow using make files and a Python package called Watchdog.

When a Jupyter notebook is saved, a Watchdog monitor running in a tmux session triggers a build and a push to the server where this site is hosted. Another Watchdog monitor running in a tmux session automatically then runs a Bash script that deploys the latest version of this blog using Dokku.

I want to use this same sort of method to monitor the ./posts directory in the Nikola working directory.

When a notebook is saved, the date in the metadata should be updated to the current date.

Get notebook and update the metadata.

This code is using the notebook that is the source of this blog post.

In [1]:
import json
from pathlib import Path
from pprint import pprint
import operator as op
from functools import reduce
from time import sleep
from copy import deepcopy

from IPython.display import display, HTML

FILENAME = 'update-the-metadata-on-every-save-in-a-jupyter-notebook.ipynb'
this_notebook_path = Path(FILENAME)
assert this_notebook_path.exists(), 'File does not exist.'
KEYS = METADATA, NIKOLA = ('metadata', 'nikola')
DATE = 'date'
ALL_KEYS = (*KEYS, DATE)
H2 = '<h2>{text}</h2>' # for notebook blog

# A Jupyter notebook is JSON.
this_notebook = json.loads(this_notebook_path.read_text())
display(HTML(H2.format(text="Jupyter notebook keys.")))
pprint(list(this_notebook.keys()), width=min(len(key) for key in this_notebook.keys()))
display(HTML(H2.format(text="Jupyter notebook metadata.")))
pprint(this_notebook[METADATA])

prev_notebook = deepcopy(this_notebook)

def get_item_by(keys, dictionary):
    try:
        return reduce(op.getitem, keys, dictionary)
    except KeyError:
        return None


nikola_metadata = get_item_by(KEYS, this_notebook)
if nikola_metadata:
    from datetime import datetime
    from datetime import timezone
    
    sleep(1) # let some time pass
    nikola_metadata[DATE] = datetime.utcnow().replace(
        tzinfo=timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z')
    this_notebook[METADATA][NIKOLA] = nikola_metadata
    this_notebook_path.write_text(json.dumps(this_notebook))
    this_notebook = json.loads(this_notebook_path.read_text())
    display(HTML(H2.format(text="Updated Jupyter notebook metadata.")))

    pprint(this_notebook[METADATA])
    assert (get_item_by(ALL_KEYS, prev_notebook) < get_item_by(ALL_KEYS, this_notebook), 
            "Date not updated")

Jupyter notebook keys.

['cells',
 'metadata',
 'nbformat',
 'nbformat_minor']

Jupyter notebook metadata.

{'kernelspec': {'display_name': 'Python 3',
                'language': 'python',
                'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
                   'file_extension': '.py',
                   'mimetype': 'text/x-python',
                   'name': 'python',
                   'nbconvert_exporter': 'python',
                   'pygments_lexer': 'ipython3',
                   'version': '3.8.0b4'},
 'nikola': {'category': '',
            'date': '2019-09-29 13:48:32 UTC',
            'description': '',
            'link': '',
            'slug': 'update-the-metadata-on-every-save-in-a-jupyter-notebook',
            'tags': '',
            'title': 'Update the Metadata on Every Save in a Jupyter Notebook',
            'type': 'text'}}

Updated Jupyter notebook metadata.

{'kernelspec': {'display_name': 'Python 3',
                'language': 'python',
                'name': 'python3'},
 'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
                   'file_extension': '.py',
                   'mimetype': 'text/x-python',
                   'name': 'python',
                   'nbconvert_exporter': 'python',
                   'pygments_lexer': 'ipython3',
                   'version': '3.8.0b4'},
 'nikola': {'category': '',
            'date': '2019-09-29 14:07:13 UTC',
            'description': '',
            'link': '',
            'slug': 'update-the-metadata-on-every-save-in-a-jupyter-notebook',
            'tags': '',
            'title': 'Update the Metadata on Every Save in a Jupyter Notebook',
            'type': 'text'}}

Makefile contents

NAMESPACE=dmmmd
SHELL := /bin/bash


update_meta:
    ~/.virtualenvs/seven-notebooks/bin/watchmedo shell-command \
        --patterns="*.ipynb" \
        --command='clear && ../update_meta.py $${watch_src_path}' \
        -w -W \
        --recursive .

execute chmod 755 update_meta.py so that the file is executable.

#!/usr/bin/env python
# contents of update_meta.py


if __name__ == "__main__":
    import sys
    from pathlib import Path
    import json
    from pprint import pprint
    import operator as op
    from functools import reduce
    from copy import deepcopy
    from time import sleep

    this_notebook_path = Path(sys.argv[1])
    try:
        assert this_notebook_path.exists()
    except AssertionError:
        print("File does not exist (yet).")
        sys.exit(0)
    print(f"updating metadata\n {this_notebook_path}")

    KEYS = METADATA, NIKOLA = ("metadata", "nikola")
    DATE = "date"
    ALL_KEYS = (*KEYS, DATE)

    # A Jupyter notebook is JSON.
    this_notebook = json.loads(this_notebook_path.read_text())
    pprint(
        list(this_notebook.keys()), width=min(len(key) for key in this_notebook.keys())
    )
    pprint(this_notebook[METADATA])

    prev_notebook = deepcopy(this_notebook)

    def get_item_by(keys, dictionary):
        try:
            return reduce(op.getitem, keys, dictionary)
        except KeyError:
            return None

    nikola_metadata = get_item_by(KEYS, this_notebook)
    if nikola_metadata:
        from datetime import datetime
        from datetime import timezone

        # Infinite loop happens when the notebook is saved after the update.
        # Don't update if it were just updated.
        WAITTIME = 60
        CURRENT = datetime.utcnow()
        FORMAT = "%Y-%m-%d %H:%M:%S %Z"
        NIKOLA_DATE = nikola_metadata[DATE]
        DELTA = CURRENT - datetime.strptime(NIKOLA_DATE, FORMAT)
        if DELTA.seconds < WAITTIME:
            print("No updated needed.")
            sys.exit(0)

        nikola_metadata[DATE] = (
            datetime.utcnow().replace(tzinfo=timezone.utc).strftime(FORMAT)
        )
        this_notebook[METADATA][NIKOLA] = nikola_metadata
        this_notebook_path.write_text(json.dumps(this_notebook))
        this_notebook = json.loads(this_notebook_path.read_text())

        pprint(this_notebook[METADATA])
        assert (
            get_item_by(ALL_KEYS, prev_notebook) < get_item_by(ALL_KEYS, this_notebook),
            "Date not updated",
        )
        sleep(5)  # Attempt to prevent multiple executions.
        sys.exit(0)