{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Experiment with python-chamelboots to create HTML.\n", "\n", "## Resources\n", "\n", "* [python-chamelboots](https://github.com/dm-wyncode/python-chamelboots)\n", "* [Bootstrap starter template](https://getbootstrap.com/docs/4.3/getting-started/introduction/)\n", "\n", "## Replicate an HTML document using chamelboots.\n", "\n", "### Specs\n", "\n", "Replace the `rel` and `integrity` attributes in the `link` tag and the `src` and `integrity` attributes in the `script` tag with different values without editing the `starter_html` string.\n", "\n", "The new result should be a list of strings that would replace a range of lines in `starter_html`." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from chamelboots.constants import HTML_PARSER, Join\n", "from chamelboots import ChameleonTemplate as CT\n", "from chamelboots import TalStatement as TS" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from functools import reduce\n", "import operator as op\n", "from pprint import pprint\n", "import itertools as it\n", "from subprocess import check_call\n", "import shlex\n", "from pathlib import Path\n", "import tempfile" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from lxml import etree\n", "from bs4 import BeautifulSoup\n", "from IPython.display import display, IFrame" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Bootstrap title\n", " \n", " \n", " \n", "
\n", "

\n", " Hello, world!\n", " \n", " I am a nested span.\n", " \n", "

\n", " \n", "
\n", " \n", "\n" ] } ], "source": [ "starter_html = \"\"\"\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Bootstrap title\n", " \n", " \n", "
\n", "

Hello, world!{nested_span}

\n", " {list_}\n", "
\n", " \n", "\"\"\".format( # add some extra HTML using chamelboots\n", " list_=CT(\n", " \"ul\", (TS(\"content\", \"structure content\"), TS(\"attributes\", \"attributes\"))\n", " ).render(\n", " attributes={\"class\": \"list-group\"},\n", " content=CT(\n", " \"li\",\n", " (TS(\"repeat\", \"item items\"), TS(\"attributes\", \"attributes\")),\n", " \"${item}\",\n", " ).render(\n", " items=(f\"foo item number {i}\" for i in range(10)),\n", " attributes={\"class\": \"list-group-item\"},\n", " ),\n", " ),\n", " nested_span=CT(\"span\", (), \"I am a nested span.\"),\n", ")\n", "print(BeautifulSoup(starter_html, \"html.parser\").prettify())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Upload `starter_html` to my static webserver to display in an IFrame" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def save_to_minio(text):\n", " tmpfile = Path(tempfile.mkstemp(suffix=\".html\")[-1])\n", " tmpfile.write_text(text)\n", " url = f\"https://minio.apps.selfip.com/mymedia/html/{tmpfile.name}\"\n", " check_call(shlex.split(f\"mc cp {tmpfile} dokkuminio/mymedia/html/\"))\n", " return url" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Display template HTML document." ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://minio.apps.selfip.com/mymedia/html/tmpixne_sks.html\n" ] }, { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "url = save_to_minio(starter_html)\n", "print(url)\n", "display(IFrame(src=url, width=\"auto\", height=500))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "tree = etree.fromstring(starter_html, HTML_PARSER)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Flat structure.\n", "\n", "Flat is better than nested. Without nesting it makes it difficult to reconstruct the original HTML." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('html', (('lang', 'en'),), ''),\n", " ('head', (), ''),\n", " ('meta', (('charset', 'utf-8'),), ''),\n", " ('meta',\n", " (('name', 'viewport'),\n", " ('content', 'width=device-width, initial-scale=1, shrink-to-fit=no')),\n", " ''),\n", " ('link',\n", " (('rel', 'stylesheet'),\n", " ('href',\n", " 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css'),\n", " ('integrity',\n", " 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T'),\n", " ('crossorigin', 'anonymous')),\n", " ''),\n", " ('script',\n", " (('defer', 'defer'),\n", " ('src', 'https://code.jquery.com/jquery-3.3.1.slim.min.js'),\n", " ('integrity',\n", " 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo'),\n", " ('crossorigin', 'anonymous')),\n", " ''),\n", " ('script',\n", " (('defer', 'defer'),\n", " ('src',\n", " 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js'),\n", " ('integrity',\n", " 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1'),\n", " ('crossorigin', 'anonymous')),\n", " ''),\n", " ('script',\n", " (('defer', 'defer'),\n", " ('src',\n", " 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js'),\n", " ('integrity',\n", " 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM'),\n", " ('crossorigin', 'anonymous')),\n", " ''),\n", " ('title', (), 'Bootstrap title'),\n", " ('body', (), ''),\n", " ('div', (), ''),\n", " ('h1', (), 'Hello, world!'),\n", " ('span', (), 'I am a nested span.'),\n", " ('ul', (('class', 'list-group'),), ''),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 0'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 1'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 2'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 3'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 4'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 5'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 6'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 7'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 8'),\n", " ('li', (('class', 'list-group-item'),), 'foo item number 9')]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups = [\n", " (e.tag, tuple(e.attrib.items()), e.text.strip() if e.text is not None else \"\")\n", " for e in tree.iter()\n", " if isinstance(e.tag, str)\n", "]\n", "groups" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define some constants." ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "INNER_CONTENT, ATTRIBS, ATTRIBUTES, TAIL = (\n", " \"inner_content\",\n", " \"attribs\",\n", " \"attributes\",\n", " \"tail\",\n", ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define functions to recursively walk the element tree and convert to nested dictionaries and lists." ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def dictdata(node):\n", " res = {}\n", " res[node.tag] = []\n", " html_to_dict(node, res[node.tag])\n", " reply = {}\n", " reply[node.tag] = {\n", " INNER_CONTENT: res[node.tag],\n", " ATTRIBS: node.attrib,\n", " TAIL: node.tail,\n", " }\n", " return reply\n", "\n", "\n", "def html_to_dict(node, res):\n", " rep = {}\n", " if len(node):\n", " for n in list(node):\n", " rep[node.tag] = []\n", " value = html_to_dict(n, rep[node.tag])\n", " if len(n):\n", "\n", " value = {\n", " INNER_CONTENT: rep[node.tag],\n", " ATTRIBUTES: n.attrib,\n", " TAIL: n.tail,\n", " }\n", " res.append({n.tag: value})\n", " else:\n", " res.append(rep[node.tag][0])\n", " else:\n", " value = {}\n", " value = {INNER_CONTENT: node.text, ATTRIBUTES: node.attrib, TAIL: node.tail}\n", " res.append({node.tag: value})\n", " return None" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "data = dictdata(tree.getroottree().getroot())" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'html': {'inner_content': [{'head': {'inner_content': [{: {'inner_content': ' Required meta tags ',\n", " 'attributes': ,\n", " 'tail': '\\n '}},\n", " {'meta': {'inner_content': None,\n", " 'attributes': {'charset': 'utf-8'},\n", " 'tail': '\\n '}},\n", " {'meta': {'inner_content': None,\n", " 'attributes': {'name': 'viewport', 'content': 'width=device-width, initial-scale=1, shrink-to-fit=no'},\n", " 'tail': '\\n '}},\n", " {: {'inner_content': ' Bootstrap CSS ',\n", " 'attributes': ,\n", " 'tail': '\\n '}},\n", " {'link': {'inner_content': None,\n", " 'attributes': {'rel': 'stylesheet', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'crossorigin': 'anonymous'},\n", " 'tail': '\\n '}},\n", " {: {'inner_content': ' Optional JavaScript ',\n", " 'attributes': ,\n", " 'tail': '\\n '}},\n", " {: {'inner_content': ' jQuery first, then Popper.js, then Bootstrap JS ',\n", " 'attributes': ,\n", " 'tail': '\\n '}},\n", " {'script': {'inner_content': None,\n", " 'attributes': {'defer': 'defer', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'crossorigin': 'anonymous'},\n", " 'tail': '\\n '}},\n", " {'script': {'inner_content': None,\n", " 'attributes': {'defer': 'defer', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'crossorigin': 'anonymous'},\n", " 'tail': '\\n '}},\n", " {'script': {'inner_content': None,\n", " 'attributes': {'defer': 'defer', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'crossorigin': 'anonymous'},\n", " 'tail': '\\n '}},\n", " {'title': {'inner_content': 'Bootstrap title',\n", " 'attributes': {},\n", " 'tail': '\\n '}}],\n", " 'attributes': {},\n", " 'tail': '\\n '}},\n", " {'body': {'inner_content': [{'div': {'inner_content': [{'h1': {'inner_content': [{'span': {'inner_content': 'I am a nested span.',\n", " 'attributes': {},\n", " 'tail': None}}],\n", " 'attributes': {},\n", " 'tail': '\\n '}},\n", " {'ul': {'inner_content': [{'li': {'inner_content': 'foo item number 0',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 1',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 2',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 3',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 4',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 5',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 6',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 7',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 8',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': '\\n'}},\n", " {'li': {'inner_content': 'foo item number 9',\n", " 'attributes': {'class': 'list-group-item'},\n", " 'tail': None}}],\n", " 'attributes': {'class': 'list-group'},\n", " 'tail': '\\n '}}],\n", " 'attributes': {},\n", " 'tail': '\\n '}}],\n", " 'attributes': {},\n", " 'tail': '\\n'}}],\n", " 'attribs': {'lang': 'en'},\n", " 'tail': None}}" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Define functions for getting all the \"paths\" to item leaves in the nested dictionary and for getting the leaf using the path.\n", "\n", "See this solution to _[Access nested dictionary items via a list of keys?](https://stackoverflow.com/a/14692747/1913726)_ on Stack Overflow." ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def paths_in_data(data, parent=()):\n", " \"\"\"Calculate keys and/or indices in dict.\"\"\"\n", "\n", " if not any(isinstance(data, type_) for type_ in (dict, list, tuple)):\n", " return (parent,)\n", " else:\n", " try:\n", " return reduce(\n", " op.add,\n", " (paths_in_data(v, op.add(parent, (k,))) for k, v in data.items()),\n", " (),\n", " )\n", " except AttributeError:\n", " return reduce(\n", " op.add,\n", " (paths_in_data(v, op.add(parent, (data.index(v),))) for v in data),\n", " (),\n", " )\n", "\n", "\n", "def get_from(data, path):\n", " \"\"\"Get a leaf from iterable of keys and/or indices.\n", " \n", " :data: Collection where nodes are either a dict or list.\n", " :path: Collection of keys and/or indices leading to a leaf.\n", " \"\"\"\n", " return reduce(op.getitem, path, data)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the items to change." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "WANTED_TAGS = (\"link\", \"script\")\n", "paths_to_mutables = [\n", " item for item in paths_in_data(data) if any(tag in item for tag in WANTED_TAGS)\n", "]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Group the paths by HTML element" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "('html', 'inner_content', 0, 'head', 'inner_content', 4, 'link', 'inner_content')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 4, 'link', 'attributes')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 4, 'link', 'tail')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 7, 'script', 'inner_content')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 7, 'script', 'attributes')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 7, 'script', 'tail')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 8, 'script', 'inner_content')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 8, 'script', 'attributes')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 8, 'script', 'tail')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 9, 'script', 'inner_content')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 9, 'script', 'attributes')\n", "('html', 'inner_content', 0, 'head', 'inner_content', 9, 'script', 'tail')\n" ] } ], "source": [ "TAG_INDEX = 5\n", "mutables = it.groupby(paths_to_mutables, key=op.itemgetter(TAG_INDEX))\n", "for key, group in mutables:\n", " for row in group:\n", " print(row)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[[{'rel': 'stylesheet', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'crossorigin': 'anonymous'},\n", " '\\n '],\n", " [{'defer': 'defer', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'crossorigin': 'anonymous'},\n", " '\\n '],\n", " [{'defer': 'defer', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'crossorigin': 'anonymous'},\n", " '\\n '],\n", " [{'defer': 'defer', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'crossorigin': 'anonymous'},\n", " '\\n ']]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "items_to_edit = [\n", " [get_from(data, row) for row in group][1:] # attributes and (inner_content or tail)\n", " for key, group in it.groupby(paths_to_mutables, key=op.itemgetter(5))\n", "]\n", "items_to_edit" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "INTEGRITY = \"integrity\"\n", "link_keys = (\"href\", \"rel\", INTEGRITY, \"crossorigin\")\n", "script_keys = (\"defer\", \"src\", *link_keys[link_keys.index(INTEGRITY):])\n", "TAIL_DEFAULT = \"\\n \"\n", "DEFER = \"defer\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experiment with alternative links\n", "\n", "### Bootswatch css breaks basic Boostrap view." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((None,\n", " {'href': 'https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css',\n", " 'rel': 'stylesheet',\n", " 'integrity': None,\n", " 'crossorigin': None},\n", " '\\n '),\n", " {'rel': 'stylesheet', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'crossorigin': 'anonymous'})" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "STYLESHEET = \"stylesheet\"\n", "BOOTSWATCH_LINK_DATA = (\n", " [\n", " None,\n", " dict(\n", " zip(\n", " link_keys,\n", " (\n", " \"http://netdna.bootstrapcdn.com/bootswatch/4.3.1/cerulean/bootstrap.min.css\",\n", " STYLESHEET,\n", " None,\n", " None,\n", " ),\n", " )\n", " ),\n", " TAIL_DEFAULT,\n", " ],\n", ")\n", "MY_LINK_DATA = (\n", " None,\n", " dict(\n", " zip(\n", " link_keys,\n", " (\n", " \"https://static.apps.selfip.com/bootstrap/4.3.1/css/boostrap.min.css\",\n", " STYLESHEET,\n", " None,\n", " None,\n", " ),\n", " )\n", " ),\n", " TAIL_DEFAULT,\n", ")\n", "ALTERNATE_LINK_DATA = (\n", " None,\n", " dict(\n", " zip(\n", " link_keys,\n", " (\n", " \"https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.3.1/css/bootstrap.min.css\",\n", " STYLESHEET,\n", " None,\n", " None,\n", " ),\n", " )\n", " ),\n", " TAIL_DEFAULT,\n", ")\n", "\n", "LINK_DATA = (\n", " None,\n", " items_to_edit[0][0],\n", " TAIL_DEFAULT,\n", ")\n", "LINK_DATA = ALTERNATE_LINK_DATA\n", "ALTERNATE_LINK_DATA, items_to_edit[0][0]" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "new_values = (\n", " (\"link\", LINK_DATA),\n", " *(\n", " (\"script\", [None, dict(zip(script_keys, values)), TAIL_DEFAULT])\n", " for values in (\n", " (\n", " DEFER,\n", " \"https://code.jquery.com/jquery-3.3.1.slim.min.js\",\n", " \"sha256-3edrmyuQ0w65f8gfBsqowzjJe2iM6n0nKciPUp8y+7E=\",\n", " \"anonymous\",\n", " ),\n", " (\n", " DEFER,\n", " \"https://unpkg.com/popper.js@1.14.7/dist/umd/popper.min.js\",\n", " None,\n", " None,\n", " ),\n", " (\n", " DEFER,\n", " \"https://ajax.aspnetcdn.com/ajax/bootstrap/4.3.1/bootstrap.min.js\",\n", " None,\n", " None,\n", " ),\n", " )\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(('link',\n", " [None,\n", " {'rel': 'stylesheet', 'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css', 'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T', 'crossorigin': 'anonymous'},\n", " '\\n ']),\n", " ('script',\n", " [None,\n", " {'defer': 'defer', 'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js', 'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo', 'crossorigin': 'anonymous'},\n", " '\\n ']),\n", " ('script',\n", " [None,\n", " {'defer': 'defer', 'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js', 'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1', 'crossorigin': 'anonymous'},\n", " '\\n ']),\n", " ('script',\n", " [None,\n", " {'defer': 'defer', 'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js', 'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM', 'crossorigin': 'anonymous'},\n", " '\\n ']))" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "TAG_INDEX = 5\n", "grouped = (\n", " tuple(group)\n", " for key, group in it.groupby(paths_to_mutables, key=op.itemgetter(TAG_INDEX))\n", ")\n", "TAG_INDEX_ = 6\n", "values = tuple(\n", " (paths[0][TAG_INDEX_], [get_from(data, path) for path in paths])\n", " for paths in grouped\n", ")\n", "values" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['',\n", " '',\n", " '',\n", " '']" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "previous_parts = [\n", " (\n", " CT(\n", " **dict(\n", " zip(\n", " (\"tag\", \"tal_statements\", INNER_CONTENT),\n", " (tag, (TS(ATTRIBUTES, ATTRIBUTES),), value[2],),\n", " )\n", " )\n", " ).render(attributes=value[1])\n", " )\n", " for tag, value in values\n", "]\n", "previous_parts" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['',\n", " '',\n", " '',\n", " '']" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_parts = [\n", " (\n", " CT(\n", " **dict(\n", " zip(\n", " (\"tag\", \"tal_statements\", INNER_CONTENT),\n", " (tag, (TS(ATTRIBUTES, ATTRIBUTES),), value[2],),\n", " )\n", " )\n", " ).render(attributes=value[1])\n", " )\n", " for tag, value in new_values\n", "]\n", "new_parts" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Get the lines from `starter_html` that need replacing" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(7, 10, 11, 12)" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "lines_to_replace = (\n", " (\n", " i,\n", " line\n", " if any(\n", " item.tag in WANTED_TAGS for item in tuple(element.iterdescendants())[-1:]\n", " )\n", " else None,\n", " )\n", " for i, line in enumerate(starter_html.splitlines())\n", " if (element := etree.fromstring(line, HTML_PARSER)) is not None\n", ")\n", "indices, _ = zip(*((i, _) for i, _ in lines_to_replace if _))\n", "indices" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " Bootstrap title\n", " \n", " \n", " \n", "
\n", "

\n", " Hello, world!\n", " \n", " I am a nested span.\n", " \n", "

\n", "
    \n", "
  • \n", " foo item number 0\n", "
  • \n", "
  • \n", " foo item number 1\n", "
  • \n", "
  • \n", " foo item number 2\n", "
  • \n", "
  • \n", " foo item number 3\n", "
  • \n", "
  • \n", " foo item number 4\n", "
  • \n", "
  • \n", " foo item number 5\n", "
  • \n", "
  • \n", " foo item number 6\n", "
  • \n", "
  • \n", " foo item number 7\n", "
  • \n", "
  • \n", " foo item number 8\n", "
  • \n", "
  • \n", " foo item number 9\n", "
  • \n", "
\n", "
\n", " \n", "\n" ] } ], "source": [ "new_parts_iter = iter(new_parts)\n", "new_html = Join.LINES(\n", " line if i not in indices else next(new_parts_iter)\n", " for i, line in enumerate(starter_html.splitlines())\n", ")\n", "print(BeautifulSoup(new_html, \"html.parser\").prettify())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Verify that `new_html` displays Boostrap styling." ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "https://minio.apps.selfip.com/mymedia/html/tmp8rtlpbmx.html\n" ] } ], "source": [ "url = save_to_minio(new_html)\n", "print(url)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Display template HTML document that has new `link` and `script` attribute values.\n", "\n", "All values were programmatically replaced with the above code." ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(IFrame(src=url, width=\"auto\", height=500))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0" }, "nikola": { "category": "", "date": "2019-11-17 20:17:02 UTC", "description": "", "link": "", "slug": "create-html-with-python-chamelboots-an-experiment", "tags": "", "title": "Create HTML with python-chamelboots: An Experiment", "type": "text" } }, "nbformat": 4, "nbformat_minor": 2 }