Skip to main content

Custom Sort Some Data

Example of a custom sort in Python

In [17]:
from random import shuffle
In [18]:
sort_order = ('meta', 'meta', 'link', 'script', 'script', 'script', 'title')
In [19]:
data = [
    *[
        {
            "link": {
                "integrity": "sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T",
                "href": "https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css",
            }
        },
        {
            "script": {
                "src": "https://code.jquery.com/jquery-3.3.1.slim.min.js",
                "integrity": "sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo",
            }
        },
        {
            "script": {
                "src": "https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js",
                "integrity": "sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1",
            }
        },
        {
            "script": {
                "src": "https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js",
                "integrity": "sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM",
            }
        },
    ],
    *[
        {key: value}
        for key, value in (
            zip(
                ["meta", "meta", "title"],
                [
                    {"charset": "utf-8"},
                    {
                        "name": "viewport",
                        "content": "width=device-width, initial-scale=1, shrink-to-fit=no",
                    },
                    {},
                ],
            )
        )
    ],
]
data_ = data.copy()
In [20]:
shuffle(data_)
In [21]:
assert not data == data_
In [22]:
data_
Out[22]:
[{'link': {'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T',
   'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css'}},
 {'script': {'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js',
   'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1'}},
 {'script': {'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js',
   'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM'}},
 {'title': {}},
 {'meta': {'charset': 'utf-8'}},
 {'meta': {'name': 'viewport',
   'content': 'width=device-width, initial-scale=1, shrink-to-fit=no'}},
 {'script': {'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js',
   'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo'}}]
In [32]:
def key(item):
    key_, = item.keys()
    return sort_order.index(key_)
In [33]:
sorted(data_, key=key )
Out[33]:
[{'meta': {'charset': 'utf-8'}},
 {'meta': {'name': 'viewport',
   'content': 'width=device-width, initial-scale=1, shrink-to-fit=no'}},
 {'link': {'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T',
   'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css'}},
 {'script': {'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js',
   'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1'}},
 {'script': {'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js',
   'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM'}},
 {'script': {'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js',
   'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo'}},
 {'title': {}}]
In [34]:
def reverse_key(item):
    key_, = item.keys()
    return -sort_order.index(key_)
In [35]:
sorted(data_, key=reverse_key )
Out[35]:
[{'title': {}},
 {'script': {'src': 'https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.7/umd/popper.min.js',
   'integrity': 'sha384-UO2eT0CpHqdSJQ6hJty5KVphtPhzWj9WO1clHTMGa3JDZwrnQq4sF86dIHNDz0W1'}},
 {'script': {'src': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/js/bootstrap.min.js',
   'integrity': 'sha384-JjSmVgyd0p3pXB1rRibZUAYoIIy6OrQ6VrjIEaFf/nJGzIxFDsf4x0xIM+B07jRM'}},
 {'script': {'src': 'https://code.jquery.com/jquery-3.3.1.slim.min.js',
   'integrity': 'sha384-q8i/X+965DzO0rT7abK41JStQIAqVgRVzpbzo5smXKp4YfRvH+8abtTE1Pi6jizo'}},
 {'link': {'integrity': 'sha384-ggOyR0iXCbMQv3Xipma34MD+dH/1fQ784/j6cY/iJTQUOhcWr7x9JvoRxT2MZw1T',
   'href': 'https://stackpath.bootstrapcdn.com/bootstrap/4.3.1/css/bootstrap.min.css'}},
 {'meta': {'charset': 'utf-8'}},
 {'meta': {'name': 'viewport',
   'content': 'width=device-width, initial-scale=1, shrink-to-fit=no'}}]

Explore using the Python class "type" to generate Bootstrap components.

Extend the dict type to create classes that output Bootstrap components using Chameleon templates.

Resources

In [53]:
from abc import ABC, abstractmethod
In [54]:
from chameleon import PageTemplate
from IPython.display import display, HTML

Note: It's not certain to me that this is a use case for an ABC or not.

Left here in case there is an opportunity to make an abstractmethod.

I wasn't sure where this was going to go.

In [55]:
class _Component(ABC):
    """Use Chameleon templates to replace only the attributes and inner
    content of Bootstrap components that change."""

    template = ""
    attrib = dict(foo="bar")

    def __init__(self, **kwargs):
        """Set :self.__dict__: to self when dict is extended.
        This allows spreading :self: into Chameleon templates as context."""
        
        super().__init__(**kwargs)
        self.__dict__ = self

    @property
    def html(self):
        """Create a Chameleon template from :self.template:.
        Spread :self: and :self.attrib: into render method on Chameleon template."""
        return PageTemplate(self.template).render(**{"attrib": self.attrib, **self})

Code a simple example of _Component to see if desired results are achieved.

In [56]:
class SuccessAlert(dict, _Component):
    template = """<div tal:attributes="attrib">
  ${inner_content}
</div>"""
    attrib={"class": "alert alert-success", "role": "alert"}
In [57]:
# Only the inner content changes.

success_alert = SuccessAlert(
    inner_content="A simple success alert—check it out!",
)
success_alert
Out[57]:
{'inner_content': 'A simple success alert—check it out!'}
In [58]:
display(HTML(success_alert.html))

Get the the eight required contextual classes

Extract the eight contextual classes from the example HTML in the Bootstrap docs.

Create classes programatically with type to render all the supported alerts.

In [59]:
from lxml import etree
from faker import Faker

fake = Faker()
In [60]:
alerts_html = """<div class="alert alert-primary" role="alert">
  A simple primary alert—check it out!
</div>
<div class="alert alert-secondary" role="alert">
  A simple secondary alert—check it out!
</div>
<div class="alert alert-success" role="alert">
  A simple success alert—check it out!
</div>
<div class="alert alert-danger" role="alert">
  A simple danger alert—check it out!
</div>
<div class="alert alert-warning" role="alert">
  A simple warning alert—check it out!
</div>
<div class="alert alert-info" role="alert">
  A simple info alert—check it out!
</div>
<div class="alert alert-light" role="alert">
  A simple light alert—check it out!
</div>
<div class="alert alert-dark" role="alert">
  A simple dark alert—check it out!
</div>"""

tree = etree.fromstring(alerts_html, etree.HTMLParser())
In [61]:
CLASS = "class"
ALERT_CLASSES_ATTRIBS = [
    e.attrib for e in tree.iterdescendants() if e.attrib
]
ALERT_CLASSES_ATTRIBS
Out[61]:
[{'class': 'alert alert-primary', 'role': 'alert'},
 {'class': 'alert alert-secondary', 'role': 'alert'},
 {'class': 'alert alert-success', 'role': 'alert'},
 {'class': 'alert alert-danger', 'role': 'alert'},
 {'class': 'alert alert-warning', 'role': 'alert'},
 {'class': 'alert alert-info', 'role': 'alert'},
 {'class': 'alert alert-light', 'role': 'alert'},
 {'class': 'alert alert-dark', 'role': 'alert'}]
In [62]:
COMPONENT_CLASS_NAMES = [
    "".join(word.title() for word in item[CLASS].split()[-1].split("-"))
    for item in ALERT_CLASSES_ATTRIBS
]
ROLE_LOOKUP = dict(zip(COMPONENT_CLASS_NAMES, ALERT_CLASSES_ATTRIBS))
ALERT_TEMPLATE = """<div tal:attributes="attrib">
  ${inner_content}
</div>"""

Each kind of Bootstrap alert is generated by looping over the classes created from parsing Bootstrap example and using the type type to make classes.

In [63]:
ALERT_COMPONENTS = dict(
    zip(
        COMPONENT_CLASS_NAMES,
        (
            type(
                class_name,
                (dict, _Component),
                dict(template=ALERT_TEMPLATE, attrib=ROLE_LOOKUP[class_name]),
            )
            for class_name in COMPONENT_CLASS_NAMES
        ),
    )
)
ALERT_COMPONENTS
Out[63]:
{'AlertPrimary': abc.AlertPrimary,
 'AlertSecondary': abc.AlertSecondary,
 'AlertSuccess': abc.AlertSuccess,
 'AlertDanger': abc.AlertDanger,
 'AlertWarning': abc.AlertWarning,
 'AlertInfo': abc.AlertInfo,
 'AlertLight': abc.AlertLight,
 'AlertDark': abc.AlertDark}
In [64]:
for key, value in ALERT_COMPONENTS.items():
    inner_html = value(inner_content=fake.paragraph()).html
    print(inner_html)
    display(HTML(inner_html))
<div class="alert alert-primary" role="alert">
  Four color TV let heavy on figure. Opportunity partner wife inside only.
</div>
<div class="alert alert-secondary" role="alert">
  Close tax tonight as. Head lose through growth establish speech truth. Generation green morning force.
</div>
<div class="alert alert-success" role="alert">
  My surface natural arm yourself stay black. Owner tonight official vote.
</div>
<div class="alert alert-danger" role="alert">
  More address real write skin media. Sell enough beautiful still.
</div>
<div class="alert alert-warning" role="alert">
  City blood issue go try. Star new story under real.
</div>
<div class="alert alert-info" role="alert">
  Strategy anyone here marriage involve entire never step. Prove claim likely note. Operation remain way material treat Mr paper help.
</div>
<div class="alert alert-light" role="alert">
  Wonder for management color themselves. Reflect glass product phone decide face.
</div>
<div class="alert alert-dark" role="alert">
  Soon need event she.
</div>

Use the PyGithub library to interact with GitHub.

Use the PyGithub library.

Resources

In [1]:
from pathlib import Path
import itertools as it
from datetime import datetime, timedelta
In [2]:
from github import Github
from faker import Faker
In [3]:
fake = Faker()
In [4]:
GITHUB_USER = "dm-wyncode"
GITHUBPWD = Path(Path.home(), '.texpander/github_wc_pwd.txt').read_text().strip()
In [5]:
github = Github(GITHUB_USER, GITHUBPWD)
github_user = github.get_user()
for repo in it.islice(github_user.get_repos(), 0, 5):
    print(repo.name)
sharemyride
popgage
fll_hackathon
team-practice
ansible-create-users
In [6]:
[item for item in dir(github_user) if not item.startswith("_") and "repo" in item]
Out[6]:
['create_repo',
 'get_repo',
 'get_repos',
 'owned_private_repos',
 'public_repos',
 'repos_url',
 'total_private_repos']

Output of github_user.create_repo??

Signature:
github_user.create_repo(
    ['name', 'description=NotSet', 'homepage=NotSet', 'private=NotSet', 'has_issues=NotSet', 'has_wiki=NotSet', 'has_downloads=NotSet', 'has_projects=NotSet', 'auto_init=NotSet', 'license_template=NotSet', 'gitignore_template=NotSet', 'allow_squash_merge=NotSet', 'allow_merge_commit=NotSet', 'allow_rebase_merge=NotSet'],
)
In [7]:
FAKE_REPO_NAME = '-'.join(fake.catch_phrase().lower().split())
DESCRIPTION = fake.paragraph()
new_repo = github_user.create_repo(FAKE_REPO_NAME, description=DESCRIPTION)
In [8]:
print(new_repo.ssh_url)
git@github.com:dm-wyncode/open-source-coherent-flexibility.git
In [9]:
[item for item in dir(new_repo) if not item.startswith("_") and "delete" in item]
Out[9]:
['delete', 'delete_file']
In [10]:
TIMEDELTA = timedelta(hours=5)
In [11]:
for repo in (
    repo
    for repo in github_user.get_repos()
    if datetime.utcnow() - repo.updated_at <= TIMEDELTA
):
    repo.delete()
    print(f'"{repo.name}" has been deleted.')
"open-source-coherent-flexibility" has been deleted.
"re-engineered-bandwidth-monitored-product" has been deleted.
"versatile-zero-defect-focus-group" has been deleted.

Answer to Stack Oveflow Question: Select Pandas DF Rows including any string in a list of strings

A possible solution to a Stack Overflow question.

In [42]:
import pandas as pd
In [49]:
# This URL doesn't return CSV.
CSV_URL = "https://drive.google.com/open?id=1rwg8c2GmtqLeGGv1xm9w6kS98iqgd6vW"
# Data file saved from within a browser to help with question.

# I stored the BitcoinData.csv data on my Minio server.
df = pd.read_csv("https://minio.apps.selfip.com/mymedia/csv/BitcoinData.csv")

selected_words = [
    "accept",
    "believe",
    "trust",
    "accepted",
    "accepts",
    "trusts",
    "believes",
    "acceptance",
    "trusted",
    "trusting",
    "accepting",
    "believes",
    "believing",
    "believed",
    "normal",
    "normalize",
    " normalized",
    "routine",
    "belief",
    "faith",
    "confidence",
    "adoption",
    "adopt",
    "adopted",
    "embrace",
    "approve",
    "approval",
    "approved",
    "approves",
]
In [44]:
%%timeit
mask = pd.Series(any(word in item for word in selected_words) for item in df["story"])
18.2 ms ± 115 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
In [45]:
%%timeit

df[mask]
947 µs ± 7.18 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
In [46]:
%%timeit

df[df.story.str.contains('|'.join(selected_words))]
128 ms ± 413 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
In [50]:
df[mask] == df[df.story.str.contains('|'.join(selected_words))]
Out[50]:
story date sentiment bitcoin price
0 True True True True
1 True True True True
2 True True True True
3 True True True True
5 True True True True
6 True True True True
7 True True True True
8 True True True True
9 True True True True
10 True True True True
13 True True True True
15 True True True True
16 True True True True
17 True True True True
20 True True True True
21 True True True True
22 True True True True
25 True True True True
26 True True True True
28 True True True True
29 True True True True
30 True True True True
31 True True True True
32 True True True True
33 True True True True
34 True True True True
35 True True True True
36 True True True True
37 True True True True
38 True True True True
... ... ... ... ...
372 True True True True
373 True True True True
374 True True True True
375 True True True True
377 True True True True
378 True True True True
379 True True True True
380 True True True True
381 True True True True
382 True True True True
383 True True True True
384 True True True True
386 True True True True
387 True True True True
388 True True True True
390 True True True True
391 True True True True
392 True True True True
393 True True True True
395 True True True True
397 True True True True
398 True True True True
401 True True True True
402 True True True True
404 True True True True
405 True True True True
407 True True True True
408 True True True True
409 True True True True
410 True True True True

336 rows × 4 columns

In [51]:
df
Out[51]:
story date sentiment bitcoin price
0 it was a curious choice for the good wife an e... 1/16/12 0 $6.68
1 when he was a yale law school student reuben g... 4/7/13 0 $162.30
2 video bitcoin has real world investors the tim... 4/11/13 0 $124.90
3 bitcoin s wild ride may not have been the bigg... 4/14/13 0 $90.00
4 amid the incense cheap art and herbal remedies... 5/6/13 1 $112.30
5 san francisco eight years ago ernie allen the ... 5/29/13 0 $132.30
6 12 34 p m updated this post was changed to pro... 5/29/13 1 $132.30
7 agaton strom for the new york timestyler left ... 7/1/13 1 $88.05
8 what is a bitcoin as a virtual currency it is ... 7/1/13 1 $88.05
9 photocredit bitcoin has been promoted as an al... 7/2/13 1 $90.13
10 depending on whom you ask bitcoins are a goofy... 7/31/13 1 $106.09
11 michael appleton for the new york timesbenjami... 8/13/13 0 $109.00
12 photocredit state and federal officials are st... 8/14/13 0 $112.56
13 photobarry silbert the chief executive of seco... 9/25/13 1 $128.22
14 any delay in raising the debt ceiling would h... 9/28/13 0 $134.78
15 nearly everything about silk road was shrouded... 10/2/13 0 $114.13
16 photocredit silkroaddrugs org via associated p... 10/8/13 0 $126.00
17 the currency known as bitcoin a much hyped and... 10/30/13 1 $199.97
18 photoideas with currency chris larsen chief ex... 11/11/13 1 $342.44
19 photobenjamin m lawsky the new york financial ... 11/14/13 0 $420.20
20 photobitcoins have strong appeal for chinese i... 11/17/13 1 $492.11
21 photounlike cash a bitcoin transaction history... 11/18/13 0 $703.56
22 photojennifer shasky calvery left with mythili... 11/18/13 0 $703.56
23 photoa screen shot of fiatleak com which track... 11/19/13 1 $584.61
24 proponents of digital currencies like bitcoins... 11/21/13 1 $722.43
25 this week the chinese government issued its mo... 11/22/13 1 $771.44
26 bitcoin isn t ready for popular consumption an... 11/23/13 0 $797.82
27 two israeli computer scientists say they may h... 11/23/13 0 $797.82
28 video bitcoin believers while regulators debat... 11/24/13 1 $774.25
29 photocredit with mounting interest from promin... 11/25/13 0 $799.11
... ... ... ... ...
381 the russian operatives accused of hacking the ... 7/22/18 1 $7,418.49
382 san francisco hedge funds go to the cayman isl... 7/29/18 1 $8,218.46
383 i m still on vacation hiking and biking in var... 7/31/18 0 $7,780.44
384 san francisco pete roberts of nottingham engla... 8/20/18 0 $6,308.53
385 how do new york times journalists use technolo... 9/12/18 1 $6,351.80
386 massena n y the hulking aluminum plant in this... 9/19/18 0 $6,398.54
387 cryptocurrencies like bitcoin have lost over h... 9/25/18 0 $6,446.47
388 get the dealbook newsletter to make sense of m... 9/25/18 0 $6,446.47
389 get the dealbook newsletter to make sense of m... 9/27/18 1 $6,676.75
390 in november 2010 as the federal reserve embark... 10/1/18 0 $6,589.62
391 storey county nev an enormous plot of land in ... 11/1/18 1 $6,377.78
392 san francisco over the last year charlie shrem... 11/2/18 1 $6,388.44
393 san francisco the news on wall street this wee... 11/21/18 0 $4,602.17
394 two iranians were behind the ransomware attack... 11/28/18 0 $4,257.42
395 there s a large spot reserved in the popular i... 12/11/18 0 $3,424.59
396 get the dealbook newsletter to make sense of m... 12/13/18 0 $3,313.68
397 palo alto calif last year around this time a t... 12/27/18 1 $3,654.83
398 get the dealbook newsletter to make sense of m... 1/7/19 0 $4,025.25
399 tbilisi georgia for three years a windowless w... 1/22/19 1 $3,604.58
400 tehran stepping from a car into a muddy indust... 1/29/19 0 $3,448.12
401 san francisco don t look now storm clouds are ... 2/3/19 0 $3,464.01
402 seoul south korea kim ki won is keeping a secr... 2/10/19 0 $3,690.19
403 in 2017 jamie dimon jpmorgan chase s chief exe... 2/14/19 1 $3,616.88
404 blockchain the ingenious database technology b... 2/15/19 1 $3,620.81
405 this essay has been updated to reflect news de... 2/23/19 1 $4,142.53
406 san francisco some of the world s biggest inte... 2/28/19 1 $3,854.79
407 in october 2017 a swat team descended on james... 3/12/19 1 $3,909.16
408 tokyo a tokyo court handed down a two and a ha... 3/15/19 0 $3,960.91
409 san francisco paul chou was among the many bit... 4/2/19 0 $4,879.88
410 hong kong china is planning new steps that cou... 4/9/19 0 $5,204.96

411 rows × 4 columns

Generate Python Library Files with Cookiecutter

Generate files sufficient for creation of a Python library using Cookiecutter and cookiecutter-pylibrary.

See this post for resources.

In [1]:
from pathlib import Path
import json
import shlex
from subprocess import check_output, Popen, PIPE, STDOUT
import os
In [2]:
# Define directory where cookiecutter templates live.
templates_path = Path(Path.home(), "projects", "cookiecutters")
In [3]:
pylibrary_path = templates_path.joinpath("cookiecutter-pylibrary")
In [4]:
# Get latest version from GitHub.
GIT_COMMAND_PREFIX = f"git -C {pylibrary_path} "
commands = (
    "reset --hard HEAD",
    "checkout master",
    "pull origin master",
)
GIT_HARD_RESET, = (f"{GIT_COMMAND_PREFIX}{command}" for command in commands[:1])

for command in commands:
    p = Popen(
        shlex.split(f"{GIT_COMMAND_PREFIX}{command}"),
        stdout=PIPE,
        stderr=STDOUT,
        universal_newlines=True,
    )
    print(p.stdout.read())
HEAD is now at ae3a882 Try to explain optional fields (issue #156).

Already on 'master'
Your branch is up to date with 'origin/master'.

From github.com:dm-wyncode/cookiecutter-pylibrary
 * branch            master     -> FETCH_HEAD
Already up to date.

In [5]:
cookiecutter_json_path = pylibrary_path.joinpath("cookiecutter.json")
paths = (templates_path, pylibrary_path, cookiecutter_json_path)

for path in paths:
    assert path.exists(), f"No dir {path}"
    
cookiecutter_context = json.loads(cookiecutter_json_path.read_text())
context_keys = tuple(cookiecutter_context.keys())

posts_dir = Path(os.path.abspath(os.curdir))

print(check_output(shlex.split(GIT_HARD_RESET)).decode())
HEAD is now at ae3a882 Try to explain optional fields (issue #156).

Truncate view of existing cookiecutter_context.

In [6]:
def head(item: dict, n=5):
    return {key: item[key] for key in list(item.keys())[:n]}
In [7]:
head(cookiecutter_context)
Out[7]:
{'full_name': 'Ionel Cristian Maries',
 'email': 'contact@ionelmc.ro',
 'website': 'https://blog.ionelmc.ro',
 'project_name': 'Nameless',
 'repo_name': "python-{{ cookiecutter.project_name|lower|replace(' ','-') }}"}

The code below was created with the intent to use pexpect with cookiecutter like it is done in this example.

--no-input option is a better way.

In [8]:
DEFAULT = None
NAME = "foo"
inputs = dict(
    default=DEFAULT,
    full_name="Don Morehouse",
    email="dm.wyncode@gmail.com",
    website="https://zip.apps.selfip.com/",
    project_name=NAME,
    repo_name=DEFAULT,
    repo_hosting=DEFAULT,
    repo_hosting_domain="apps.selfip.com",  # I host my own gitolite git server
    repo_username="gitolite3",  # TODO make git server URL into SSH version
    package_name=NAME,
    distribution_name=NAME,
    project_short_description=DEFAULT,
    release_date=DEFAULT,
    year_from=DEFAULT,
    year_to=DEFAULT,
    version=DEFAULT,
    license=DEFAULT,
    c_extension_support=DEFAULT,
    c_extension_optional=DEFAULT,
    c_extension_module=DEFAULT,
    c_extension_function=DEFAULT,
    c_extension_test_pypi=DEFAULT,
    c_extension_test_pypi_username=DEFAULT,
    test_matrix_configurator=DEFAULT,
    test_matrix_separate_coverage=DEFAULT,
    test_runner=DEFAULT,
    setup_py_uses_test_runner=DEFAULT,
    setup_py_uses_setuptools_scm=DEFAULT,
    pypi_badge=DEFAULT,
    pypi_disable_upload=DEFAULT,
    allow_tests_inside_package=DEFAULT,
    linter=DEFAULT,
    command_line_interface=DEFAULT,
    command_line_interface_bin_name=DEFAULT,
    coveralls=DEFAULT,
    coveralls_token=DEFAULT,
    codecov=DEFAULT,
    landscape=DEFAULT,
    scrutinizer=DEFAULT,
    codacy=DEFAULT,
    codacy_projectid=DEFAULT,
    codeclimate=DEFAULT,
    sphinx_docs=DEFAULT,
    sphinx_theme=DEFAULT,
    sphinx_doctest=DEFAULT,
    sphinx_docs_hosting=f"https://static.apps.selfip.com/{NAME}_docs/",
    travis=DEFAULT,
    travis_osx=DEFAULT,
    appveyor=DEFAULT,
    requiresio=DEFAULT,
)

Diff the inputs keys and context_keys

In [9]:
set(context_keys).difference(set(inputs.keys()))
Out[9]:
{'_extensions'}
In [10]:
EXTENSIONS = "_extensions"
inputs.update({EXTENSIONS: cookiecutter_context[EXTENSIONS]})
In [11]:
assert not set(context_keys).difference(
    set(inputs.keys())
), "inputs not current with cookiecutter_context"
In [12]:
custom_context = {
    key: inputs[key] or value for key, value in cookiecutter_context.items()
}
In [13]:
head(custom_context)
Out[13]:
{'full_name': 'Don Morehouse',
 'email': 'dm.wyncode@gmail.com',
 'website': 'https://zip.apps.selfip.com/',
 'project_name': 'foo',
 'repo_name': "python-{{ cookiecutter.project_name|lower|replace(' ','-') }}"}

Write the custom_context to the Cookiecutter template overwriting the existing one.

In [14]:
cookiecutter_json_path.write_text(json.dumps(custom_context))
Out[14]:
2444
In [15]:
import tempfile
from pprint import pprint

BASE_DIR = Path(Path.home(), "projects", "cookie-cut-py-lib")

NAME = "foo"
PROJECT_NAME = f"python-{NAME}"
PREFIX = f"{PROJECT_NAME}-"
WORKING_DIR_NAME = tempfile.mkdtemp(prefix=PREFIX, dir=BASE_DIR)
WORKING_DIR = BASE_DIR.joinpath(WORKING_DIR_NAME)
assert WORKING_DIR.exists(), "No working directory exists."
COMMAND = f"cookiecutter {pylibrary_path} --no-input -o {WORKING_DIR}"

pprint(check_output(shlex.split(COMMAND)).decode().splitlines(), indent=4)
[   'bootstrap create: '
    '/home/dmmmd/projects/cookie-cut-py-lib/python-foo-fygknkss/python-foo/.tox/bootstrap',
    'bootstrap installdeps: jinja2, matrix, tox',
    'bootstrap installed: '
    'configparser2==4.0.0,filelock==3.0.12,importlib-metadata==0.23,Jinja2==2.10.3,MarkupSafe==1.1.1,matrix==2.0.1,more-itertools==7.2.0,packaging==19.2,pluggy==0.13.0,py==1.8.0,pyparsing==2.4.2,six==1.12.0,toml==0.10.0,tox==3.14.0,virtualenv==16.7.7,zipp==0.6.0',
    "bootstrap run-test-pre: PYTHONHASHSEED='4072576528'",
    'bootstrap run-test: commands[0] | python ci/bootstrap.py --no-env',
    'Project path: '
    '/home/dmmmd/projects/cookie-cut-py-lib/python-foo-fygknkss/python-foo',
    'Wrote .travis.yml',
    'Wrote .appveyor.yml',
    'DONE.',
    '___________________________________ summary '
    '____________________________________',
    '  bootstrap: commands succeeded',
    '  congratulations :)',
    '',
    '################################################################################',
    '',
    '    Generating CI configuration ...',
    '',
    '',
    '################################################################################',
    '################################################################################',
    '',
    '    You have succesfully created `python-foo`.',
    '',
    '################################################################################',
    '',
    "    You've used these cookiecutter parameters:",
    '',
    "        _extensions:               ['jinja2_time.TimeExtension']",
    '        _template:                 '
    "'/home/dmmmd/projects/cookiecutters/cookiecutter-pylibrary'",
    "        allow_tests_inside_package: 'no'",
    "        appveyor:                  'yes'",
    "        c_extension_function:      'longest'",
    "        c_extension_module:        '_foo'",
    "        c_extension_optional:      'no'",
    "        c_extension_support:       'no'",
    "        c_extension_test_pypi:     'no'",
    "        c_extension_test_pypi_username: 'gitolite3'",
    "        codacy:                    'no'",
    "        codacy_projectid:          '[Get ID from "
    "https://app.codacy.com/app/gitolite3/python-foo/settings]'",
    "        codeclimate:               'no'",
    "        codecov:                   'yes'",
    "        command_line_interface:    'plain'",
    "        command_line_interface_bin_name: 'foo'",
    "        coveralls:                 'no'",
    "        coveralls_token:           '[Required for Appveyor, take it from "
    "https://coveralls.io/github/gitolite3/python-foo]'",
    "        distribution_name:         'foo'",
    "        email:                     'dm.wyncode@gmail.com'",
    "        full_name:                 'Don Morehouse'",
    "        landscape:                 'no'",
    "        license:                   'BSD 2-Clause License'",
    "        linter:                    'flake8'",
    "        package_name:              'foo'",
    "        project_name:              'foo'",
    "        project_short_description: 'An example package. Generated with "
    "cookiecutter-pylibrary.'",
    "        pypi_badge:                'yes'",
    "        pypi_disable_upload:       'no'",
    "        release_date:              'today'",
    "        repo_hosting:              'github.com'",
    "        repo_hosting_domain:       'apps.selfip.com'",
    "        repo_name:                 'python-foo'",
    "        repo_username:             'gitolite3'",
    "        requiresio:                'yes'",
    "        scrutinizer:               'no'",
    "        setup_py_uses_setuptools_scm: 'no'",
    "        setup_py_uses_test_runner: 'no'",
    "        sphinx_docs:               'yes'",
    '        sphinx_docs_hosting:       '
    "'https://static.apps.selfip.com/foo_docs/'",
    "        sphinx_doctest:            'no'",
    "        sphinx_theme:              'sphinx-rtd-theme'",
    "        test_matrix_configurator:  'no'",
    "        test_matrix_separate_coverage: 'no'",
    "        test_runner:               'pytest'",
    "        travis:                    'yes'",
    "        travis_osx:                'no'",
    "        version:                   '0.0.0'",
    "        website:                   'https://zip.apps.selfip.com/'",
    "        year_from:                 '2019'",
    "        year_to:                   '2019'",
    '',
    '    See .cookiecutterrc for instructions on regenerating the project.',
    '',
    '################################################################################',
    '',
    '    To get started run these:',
    '',
    '        cd python-foo',
    '        git init',
    '        git add --all',
    '        git commit -m "Add initial project skeleton."',
    '        git remote add origin '
    'git@apps.selfip.com:gitolite3/python-foo.git',
    '        git push -u origin master',
    '',
    '',
    '    To regenerate your .travis.yml or .appveyor.yml run:',
    '',
    '',
    '        tox -e bootstrap',
    '',
    '    You can also run:',
    '',
    '        ./ci/bootstrap.py',
    '',
    '']

The results look as expected.

In [16]:
os.chdir(WORKING_DIR)
print(Path(*Path(os.curdir).absolute().parts[3:]))
os.chdir(PROJECT_NAME)
projects/cookie-cut-py-lib/python-foo-fygknkss
In [17]:
!ls
AUTHORS.rst    CONTRIBUTING.rst  MANIFEST.in	 setup.cfg  tests
CHANGELOG.rst  docs		 pyproject.toml  setup.py   tox.ini
ci	       LICENSE		 README.rst	 src
In [18]:
p = Popen(shlex.split("tox -p auto"), stdout=PIPE, stderr=STDOUT)
p.wait()
output = p.stdout.read()
In [19]:
lines = output.decode().splitlines()
START = "  clean: commands succeeded"
pprint("\n".join(lines[lines.index(START) + 1:]))
('ERROR:   check: parallel child exit code 1\n'
 'ERROR:   docs: parallel child exit code 1\n'
 '  py27: commands succeeded\n'
 'ERROR:   py34: parallel child exit code 1\n'
 'ERROR:   py35: parallel child exit code 1\n'
 '  py36: commands succeeded\n'
 '  py37: commands succeeded\n'
 'ERROR:   pypy: parallel child exit code 1\n'
 'ERROR:   pypy3: parallel child exit code 1\n'
 '  report: commands succeeded')

Install the library

In [20]:
!pip install -e .
Obtaining file:///home/dmmmd/projects/cookie-cut-py-lib/python-foo-fygknkss/python-foo
  Installing build dependencies ... done
  Getting requirements to build wheel ... done
    Preparing wheel metadata ... done
Installing collected packages: foo
  Running setup.py develop for foo
Successfully installed foo

Run the foo executable

In [21]:
!foo
['/home/dmmmd/.virtualenvs/seven-notebooks/bin/foo']

Source for executable 'foo'

This code was autogenerated by the Cookiecutter template.

In [22]:
# %load src/foo/cli.py
"""
Module that contains the command line app.

Why does this file exist, and why not put this in __main__?

  You might be tempted to import things from __main__ later, but that will cause
  problems: the code will get executed twice:

  - When you run `python -mfoo` python will execute
    ``__main__.py`` as a script. That means there won't be any
    ``foo.__main__`` in ``sys.modules``.
  - When you import __main__ it will get executed again (as a module) because
    there's no ``foo.__main__`` in ``sys.modules``.

  Also see (1) from http://click.pocoo.org/5/setuptools/#setuptools-integration
"""
import sys


def main(argv=sys.argv):
    """
    Args:
        argv (list): List of arguments

    Returns:
        int: A return code

    Does stuff.
    """
    print(argv)
    return 0

Display coverage index.html generated during tox.

In [23]:
from IPython.display import display, HTML
In [24]:
index_file = Path('htmlcov', 'index.html')
display(HTML(index_file.read_text()))
Coverage report
Hide keyboard shortcuts

Hot-keys on this page

n s m x b p c   change column sorting

Module statements missing excluded branches partial coverage
Total 12 4 0 2 0 57.14%
src/foo/__init__.py 1 0 0 0 0 100.00%
src/foo/__main__.py 4 4 0 2 0 0.00%
src/foo/cli.py 4 0 0 0 0 100.00%
tests/test_foo.py 3 0 0 0 0 100.00%

No items found using the specified filter.

Display auto generated documentation.

This documentation was auto-generated from the library code when tox was run.

In [25]:
doc_index_file = Path('dist', 'docs', 'index.html')
display(HTML(doc_index_file.read_text()))
Contents — foo 0.0.0 documentation

Todo

  • Learn more about Tox
  • Learn more about the other options the template offers.
  • Tweak the template to use private Git server Gitolite

Notes to self.

  • tox -p auto will throw errors without having done a git commit first.
  • packaging step: python setup.py sdist

This step not found in the tox.ini generated by cookiecutter-pylibrary

Note that for this operation the same Python environment will be used as the one tox is installed into (therefore you need to make sure that it contains your build dependencies). Skip this step for application projects that don’t have a setup.py

Extract Content from an "href" Attribute of an "A" tag in HTML using Python

Stack Overflow solution

Initially my first thought was that this question on Stack Overflow was stupid anti-pattern because anybody can split strings to get wanted data. I consider it a hack though if the data has a scheme and could be parsed by a proper parser.

The poser of the question changed the question to clarify that he wanted . I'm happy he did because I learned something new. I have been creating the data URLs to embed into a website by joining strings.

It turns out there is a parser and its a well-defined scheme for data URLs. Usually when I think "this is stupid anti-pattern" I then do some research and learn something along the way. Though some things do remain stupid anti-pattern and knowing which ones are still stupid anti-pattern is a skill, too.

For example, trying to parse HTML with regular expressions is stupid anti-pattern.

Resources

In [1]:
html_string = """
<a href="data:text/csv;charset=UTF-8,csvcontentfollows">
<a href="data:text/csv;charset=UTF-8,csvcontentfollows">
<a href="data:text/csv;charset=UTF-8,csvcontentfollows">
"""

Update: A comment on Stack Overflow reveals there is native Python support for data URIs.

In [3]:
from contextlib import ExitStack
from urllib.request import urlopen
import lxml.etree

HREF = "href"

tree = lxml.etree.fromstring(html_string, lxml.etree.HTMLParser())

uris = (
    item.attrib[HREF]
    for item in tree.iterdescendants()
    if HREF in item.attrib
)

with ExitStack() as stack:
    resources = (stack.enter_context(urlopen(uri)) for uri in uris)
    data = [fh.read().decode() for fh in resources]
print(data)
['csvcontentfollows', 'csvcontentfollows', 'csvcontentfollows']

Run a Python Script on a Schedule Using the sched Module.

Stack Overflow solution

First accepted answer on Stack Overflow

In [1]:
import sched
import time
from datetime import datetime, timedelta

# Create a scheduler instance.
scheduler = sched.scheduler(timefunc=time.time)

def reschedule(interval: dict=None):
    """Define how often the action function will run.
    Pass a dict interval {'hours': 1} to make it run every hour.
    """
    interval = {'minutes': 1} if interval is None else interval
    # Get the current time and remove the seconds and microseconds.
    now = datetime.now().replace(second=0, microsecond=0)
    # Add the time interval to now
    target = now + timedelta(**interval)
    # Schedule the task
    scheduler.enterabs(target.timestamp(), priority=0, action=get_report)

def get_report(analytics=None):
    # replace the print call with the code execute the Google API call
    print(time.ctime())
    
    reschedule() # Reschedule so it runs again.

if __name__ == "__main__":
    reschedule() # start

    try:
        scheduler.run(blocking=True)
    except KeyboardInterrupt:
        print('Stopped.')
Tue Oct 29 22:35:00 2019
Tue Oct 29 22:36:00 2019
Stopped.

Use the HTMLParser class in Python: a Stack Overflow Answer Submission

A solution to a Stack Overflow question.

Why parsing HTML with regular expressions is an anti-pattern.

In [6]:
from html.parser import HTMLParser

class MyHTMLParser(HTMLParser):

    def __init__(self):
        super().__init__()
        self.data = []
        self.a_tag = None

    def handle_starttag(self, tag, attrs):
        if tag == "a":
            self.a_tag = True

    def handle_data(self, data):
        if self.a_tag:
            self.data.append(data)
            self.a_tag = False

string = """aaa<a class="c-item_foot" href="/news/a/">11r11</a></div>bbb<a class="c-item_foot" href="/news/b/">222</a></div>ccgc<a class="c-item_foot" href="/news/c/">3333a333</a></div>ddd<a class="c-item_foot" href="/news/d/">44a444444</a></div>eee"""
parser = MyHTMLParser()
parser.feed(string)
print(parser.data)
['11r11', '222', '3333a333', '44a444444']

Visualize an HTML document in a Pandas Dataframe with MultiIndex

In [4]:
import itertools as it
import urllib
from io import BytesIO
In [5]:
from lxml import etree
import pandas as pd
In [6]:
html_parser = etree.HTMLParser()
In [7]:
def get_dom(html):
    return etree.fromstring(html, html_parser)


def get_indexed_data(dom):
    *index, data = zip(
        *it.chain(
            *filter(
                lambda x: x,
                (
                    [[element.tag, k, v] for k, v in element.attrib.items()]
                    for element in dom.iterdescendants()
                ),
            )
        )
    )
    index = pd.MultiIndex.from_tuples(
        list(zip(*index)), names=["tag name", "attribute"]
    )
    return pd.DataFrame(list(data), columns=["value"], index=index)
In [8]:
def get_df_from(url):
    with urllib.request.urlopen(url) as fh:
        b = BytesIO(fh.read())
    b.seek(0)
    return get_indexed_data(get_dom(b.read().decode("utf-8")))

Jupyter notebook styling right aligns cells. Force left align.

In [2]:
%%html

<style>
   table {border-collapse:collapse; table-layout:fixed; width:auto;}
   table td {border:solid 1px #fab; width:110px; word-wrap:break-word; text-align:left !important;}
   table th {text-align:left !important;}
</style>
In [10]:
if __name__ == "__main__":
    from IPython.display import display, HTML

    display(HTML(get_df_from("https://twitter.com/").to_html()))
value
tag name attribute
meta charset utf-8
script nonce 5qzYbeENFBjkM6bQn6jGIA==
id bouncer_terminate_iframe
nonce 5qzYbeENFBjkM6bQn6jGIA==
id resolve_inline_redirects
nonce 5qzYbeENFBjkM6bQn6jGIA==
id swift_action_queue
nonce 5qzYbeENFBjkM6bQn6jGIA==
id composition_state
nonce 5qzYbeENFBjkM6bQn6jGIA==
link rel stylesheet
href https://abs.twimg.com/a/1571872955/css/t1/twit...
class coreCSSBundles
rel stylesheet
class moreCSSBundles
href https://abs.twimg.com/a/1571872955/css/t1/twit...
rel stylesheet
class moreCSSBundles
href https://abs.twimg.com/a/1571872955/css/t1/twit...
rel dns-prefetch
href https://pbs.twimg.com
rel dns-prefetch
href https://t.co
rel preload
href https://abs.twimg.com/k/en/init.en.3e84071c3bc...
as script
rel preload
href https://abs.twimg.com/k/en/0.commons.en.06a750...
as script
meta name robots
content NOODP
name description
content From breaking news and entertainment to sports...
name msapplication-TileImage
content //abs.twimg.com/favicons/win8-tile-144.png
name msapplication-TileColor
content #00aced
link rel mask-icon
sizes any
href https://abs.twimg.com/a/1571872955/icons/favic...
color #1da1f2
rel shortcut icon
href //abs.twimg.com/favicons/favicon.ico
type image/x-icon
rel apple-touch-icon
href https://abs.twimg.com/icons/apple-touch-icon-1...
sizes 192x192
rel manifest
href /manifest.json
meta name swift-page-name
id swift-page-name
content front
name swift-page-section
id swift-section-name
content front
link rel canonical
href https://twitter.com/
rel alternate
hreflang x-default
href https://twitter.com/
rel alternate
hreflang fr
href https://twitter.com/?lang=fr
rel alternate
hreflang en
href https://twitter.com/?lang=en
rel alternate
hreflang ar
href https://twitter.com/?lang=ar
rel alternate
hreflang ja
href https://twitter.com/?lang=ja
rel alternate
hreflang es
href https://twitter.com/?lang=es
rel alternate
hreflang de
href https://twitter.com/?lang=de
rel alternate
hreflang it
href https://twitter.com/?lang=it
rel alternate
hreflang id
href https://twitter.com/?lang=id
rel alternate
hreflang pt
href https://twitter.com/?lang=pt
rel alternate
hreflang ko
href https://twitter.com/?lang=ko
rel alternate
hreflang tr
href https://twitter.com/?lang=tr
rel alternate
hreflang ru
href https://twitter.com/?lang=ru
rel alternate
hreflang nl
href https://twitter.com/?lang=nl
rel alternate
hreflang fil
href https://twitter.com/?lang=fil
rel alternate
hreflang ms
href https://twitter.com/?lang=ms
rel alternate
hreflang zh-tw
href https://twitter.com/?lang=zh-tw
rel alternate
hreflang zh-cn
href https://twitter.com/?lang=zh-cn
rel alternate
hreflang hi
href https://twitter.com/?lang=hi
rel alternate
hreflang no
href https://twitter.com/?lang=no
rel alternate
hreflang sv
href https://twitter.com/?lang=sv
rel alternate
hreflang fi
href https://twitter.com/?lang=fi
rel alternate
hreflang da
href https://twitter.com/?lang=da
rel alternate
hreflang pl
href https://twitter.com/?lang=pl
rel alternate
hreflang hu
href https://twitter.com/?lang=hu
rel alternate
hreflang fa
href https://twitter.com/?lang=fa
rel alternate
hreflang he
href https://twitter.com/?lang=he
rel alternate
hreflang ur
href https://twitter.com/?lang=ur
rel alternate
hreflang th
href https://twitter.com/?lang=th
rel alternate
hreflang uk
href https://twitter.com/?lang=uk
rel alternate
hreflang ca
href https://twitter.com/?lang=ca
rel alternate
hreflang ga
href https://twitter.com/?lang=ga
rel alternate
hreflang el
href https://twitter.com/?lang=el
rel alternate
hreflang eu
href https://twitter.com/?lang=eu
rel alternate
hreflang cs
href https://twitter.com/?lang=cs
rel alternate
hreflang gl
href https://twitter.com/?lang=gl
rel alternate
hreflang ro
href https://twitter.com/?lang=ro
rel alternate
hreflang hr
href https://twitter.com/?lang=hr
rel alternate
hreflang en-gb
href https://twitter.com/?lang=en-gb
rel alternate
hreflang vi
href https://twitter.com/?lang=vi
rel alternate
hreflang bn
href https://twitter.com/?lang=bn
rel alternate
hreflang bg
href https://twitter.com/?lang=bg
rel alternate
hreflang sr
href https://twitter.com/?lang=sr
rel alternate
hreflang sk
href https://twitter.com/?lang=sk
rel alternate
hreflang gu
href https://twitter.com/?lang=gu
rel alternate
hreflang mr
href https://twitter.com/?lang=mr
rel alternate
hreflang ta
href https://twitter.com/?lang=ta
rel alternate
hreflang kn
href https://twitter.com/?lang=kn
rel alternate
media handheld, only screen and (max-width: 640px)
href https://mobile.twitter.com/
rel alternate
href android-app://com.twitter.android/twitter/fron...
rel search
type application/opensearchdescription+xml
href /opensearch.xml
title Twitter
id async-css-placeholder
body class three-col logged-out static-logged-out-home-page
data-fouc-class-names swift-loading no-nav-banners
dir ltr
script id swift_loading_indicator
nonce 5qzYbeENFBjkM6bQn6jGIA==
form action https://mobile.twitter.com/i/nojs_router?path=%2F
method POST
class NoScriptForm
input type hidden
value 84a68ea7047b5f969683265536ec597ebdfd4bb3
name authenticity_token
div class NoScriptForm-content
span class NoScriptForm-logo Icon Icon--logo Icon--extraL...
p class NoScriptForm-buttonContainer
button type submit
class EdgeButton EdgeButton--primary
a href #timeline
class u-hiddenVisually focusable
div id doc
data-at-shortcutkeys {"Enter":"Open Tweet details","o":"Expand phot...
class
class StaticLoggedOutHomePage
class StaticLoggedOutHomePage-content
class StaticLoggedOutHomePage-cell StaticLoggedOutHo...
class StaticLoggedOutHomePage-login
form action https://twitter.com/sessions
class LoginForm js-front-signin
method post
data-component login_callout
data-element form
div class LoginForm-input LoginForm-username
input type text
class text-input email-input js-signin-email
name session[username_or_email]
autocomplete username
placeholder Phone, email, or username
div class LoginForm-input LoginForm-password
input type password
class text-input
name session[password]
placeholder Password
autocomplete current-password
div class LoginForm-staticForgot
a class forgot
href /account/begin_password_reset
rel noopener
input type submit
class EdgeButton EdgeButton--secondary EdgeButton--m...
value Log in
type hidden
name return_to_ssl
value true
type hidden
name scribe_log
type hidden
name redirect_after_login
value /
type hidden
value 84a68ea7047b5f969683265536ec597ebdfd4bb3
name authenticity_token
type hidden
name ui_metrics
autocomplete off
script src /i/js_inst?c_name=ui_metrics
async
div class StaticLoggedOutHomePage-signupBlock
class StaticLoggedOutHomePage-signupHeader
span class Icon Icon--bird
a class StaticLoggedOutHomePage-input StaticLoggedOutH...
href /login
h1 class StaticLoggedOutHomePage-signupTitle
div class StaticLoggedOutHomePage-noSignupForm
h2 class StaticLoggedOutHomePage-signupSubtitle
div class StaticLoggedOutHomePage-buttons
a class js-nav EdgeButton EdgeButton--medium EdgeButto...
href https://twitter.com/signup
class js-nav EdgeButton EdgeButton--medium EdgeButto...
href /login
div class StaticLoggedOutHomePage-cell StaticLoggedOutHo...
<cyfunction ProcessingInstruction at 0x7efced9be3f8> version 1.0
encoding UTF-8
svg class twitterIcon-bird
viewbox 0 0 1208 982
version 1.1
xmlns http://www.w3.org/2000/svg
xmlns:xlink http://www.w3.org/1999/xlink
g id Final-Horizon
stroke none
stroke-width 1
fill none
fill-rule evenodd
id Artboard
transform translate(-286.000000, -117.000000)
fill-rule nonzero
fill #1B95E0
path d M1493.75308,233.195911 C1449.31783,252.922544 ...
id bird
div class StaticLoggedOutHomePage-communicationContent
class StaticLoggedOutHomePage-communicationItem
span class Icon Icon--search
div class StaticLoggedOutHomePage-communicationItem
span class Icon Icon--people
div class StaticLoggedOutHomePage-communicationItem
span class Icon Icon--reply
div class front-warning
a href http://m.twitter.com
rel noopener
div class front-warning
id front-no-cookies-warn
class StreamsFooter StreamsFooter--fixed
ul class StreamsFooter-list u-cf
li class StreamsFooter-item
a href /about
rel noopener
li class StreamsFooter-item
a href //support.twitter.com
rel noopener
li class StreamsFooter-item
a href https://blog.twitter.com
rel noopener
li class StreamsFooter-item
a href http://status.twitter.com
rel noopener
li class StreamsFooter-item
a href https://about.twitter.com/careers
rel noopener
li class StreamsFooter-item
a href /tos
rel noopener
li class StreamsFooter-item
a href /privacy
rel noopener
li class StreamsFooter-item
a href //support.twitter.com/articles/20170514
rel noopener
li class StreamsFooter-item
a href //business.twitter.com/en/help/troubleshooting...
rel noopener
li class StreamsFooter-item
a href //about.twitter.com/press/brand-assets
rel noopener
li class StreamsFooter-item
a href https://about.twitter.com/products
rel noopener
li class StreamsFooter-item
a href //ads.twitter.com/?ref=gl-tw-tw-twitter-advertise
rel noopener
li class StreamsFooter-item
a href https://marketing.twitter.com
rel noopener
li class StreamsFooter-item
a href https://business.twitter.com
rel noopener
li class StreamsFooter-item
a href //dev.twitter.com
rel noopener
li class StreamsFooter-item
a href /i/directory/profiles
rel noopener
li class StreamsFooter-item
a href /settings/personalization
rel noopener
li class StreamsFooter-item StreamsFooter-copyright
div class alert-messages hidden
id message-drawer
class message
class message-inside
span class message-text
a role button
class Icon Icon--close Icon--medium dismiss
href #
span class visuallyhidden
div class gallery-overlay
class Gallery with-tweet
style class Gallery-styles
div class Gallery-closeTarget
class Gallery-content
class GalleryTweet-newsCameraBadge
button type button
class modal-btn modal-close modal-close-fixed js-close
span class Icon Icon--close Icon--large
class visuallyhidden
div class Gallery-media
class GalleryNav GalleryNav--prev
span class GalleryNav-handle GalleryNav-handle--prev
class Icon Icon--caretLeft Icon--large
class u-hiddenVisually
div class GalleryNav GalleryNav--next
span class GalleryNav-handle GalleryNav-handle--next
class Icon Icon--caretRight Icon--large
class u-hiddenVisually
div class GalleryTweet
class modal-overlay
id profile-hover-container
id goto-user-dialog
class modal-container
class modal modal-small draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class modal-inner
form class t1-form goto-user-form
input class input-block username-input
type text
placeholder Start typing a name to jump to a profile
aria-label User
div role listbox
class dropdown-menu typeahead
aria-hidden true
class dropdown-caret
class caret-outer
class caret-inner
role presentation
class dropdown-inner js-typeahead-results
role presentation
class typeahead-saved-searches
h3 id saved-searches-heading
class typeahead-category-title saved-searches-title
ul role presentation
class typeahead-items saved-searches-list
li role presentation
class typeahead-item typeahead-saved-search-item
span class Icon Icon--close
aria-hidden true
class visuallyhidden
a role option
aria-describedby saved-searches-heading
class js-nav
href
data-search-query
data-query-source
data-ds saved_search
tabindex -1
ul role presentation
class typeahead-items typeahead-topics
li role presentation
class typeahead-item typeahead-topic-item
a role option
class js-nav
href
data-search-query
data-query-source typeahead_click
data-ds topics
tabindex -1
ul role presentation
class typeahead-items typeahead-accounts social-cont...
li role presentation
data-user-id
data-user-screenname
data-remote true
data-score
class typeahead-item typeahead-account-item js-selec...
a role option
class js-nav
data-query-source typeahead_click
data-search-query
data-ds account
div class js-selectable typeahead-in-conversation hidden
span class Icon Icon--follower Icon--small
class typeahead-in-conversation-text
img class avatar size32
alt
span class typeahead-user-item-info account-group
class fullname
class UserBadges
class Icon Icon--verified js-verified hidden
class u-hiddenVisually
class Icon Icon--protected js-protected hidden
class u-hiddenVisually
class UserNameBreak
class username u-dir
dir ltr
class typeahead-social-context
li role presentation
class js-selectable typeahead-accounts-shortcut js-s...
a role option
class js-nav
href
data-search-query
data-query-source typeahead_click
data-shortcut true
data-ds account_search
ul role presentation
class typeahead-items typeahead-trend-locations-list
li role presentation
class typeahead-item typeahead-trend-locations-item
a role option
class js-nav
href
data-ds trend_location
data-search-query
tabindex -1
div role presentation
class typeahead-user-select
role presentation
class typeahead-empty-suggestions
ul role presentation
class typeahead-items typeahead-selected js-typeahea...
li role presentation
data-user-id
data-user-screenname
data-remote true
data-score
class typeahead-item typeahead-selected-item js-sele...
a role option
class js-nav
data-query-source typeahead_click
data-search-query
data-ds account
img class avatar size32
alt
span class typeahead-user-item-info account-group
class select-status deselect-user js-deselect-user I...
class select-status select-disabled Icon Icon--unfollow
class fullname
class UserBadges
class Icon Icon--verified js-verified hidden
class u-hiddenVisually
class Icon Icon--protected js-protected hidden
class u-hiddenVisually
class UserNameBreak
class username u-dir
dir ltr
li role presentation
class typeahead-selected-end
ul role presentation
class typeahead-items typeahead-accounts js-typeahea...
li role presentation
data-user-id
data-user-screenname
data-remote true
data-score
class typeahead-item typeahead-account-item js-selec...
a role option
class js-nav
data-query-source typeahead_click
data-search-query
data-ds account
img class avatar size32
alt
span class typeahead-user-item-info account-group
class select-status deselect-user js-deselect-user I...
class select-status select-disabled Icon Icon--unfollow
class fullname
class UserBadges
class Icon Icon--verified js-verified hidden
class u-hiddenVisually
class Icon Icon--protected js-protected hidden
class u-hiddenVisually
class UserNameBreak
class username u-dir
dir ltr
li role presentation
class typeahead-accounts-end
div role presentation
class typeahead-dm-conversations
ul role presentation
class typeahead-items typeahead-dm-conversation-items
li role presentation
class typeahead-item typeahead-dm-conversation-item
a role option
tabindex -1
div id quick-promote-dialog
class QuickPromoteDialog modal-container
class modal draggable
class modal-content
button type button
class modal-btn modal-close modal-close-fixed js-close
span class Icon Icon--close Icon--large
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class quick-promote-view-container
class media
iframe class quick-promote-iframe js-initial-focus
scrolling no
frameborder 0
src
div id block-user-dialog
class modal-container
class modal draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class tweet-loading
class spinner-bigger
class modal-body modal-tweet
class modal-footer
button class EdgeButton EdgeButton--tertiary cancel-action ...
class EdgeButton EdgeButton--danger block-action
div id geo-disabled-dropdown
tabindex -1
class dropdown-caret
span class caret-outer
class caret-inner
li class geo-not-enabled-yet
a href http://support.twitter.com/forums/26810/entrie...
target _blank
rel noopener
button type button
class geo-turn-on EdgeButton EdgeButton--primary
type button
class geo-not-now EdgeButton EdgeButton--secondary
div id geo-enabled-dropdown
tabindex -1
class dropdown-caret
span class caret-outer
class caret-inner
div class geo-query-location
input class GeoSearch-queryInput
type text
autocomplete off
placeholder Search for a neighborhood or city
span class Icon Icon--search
div class geo-dropdown-status
ul class GeoSearch-dropdownMenu
div id list-membership-dialog
class modal-container
class modal modal-small draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class list-membership-content
span class spinner lists-spinner
title Loading…
div id list-operations-dialog
class modal-container
class modal modal-medium draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class list-editor
class field
label class t1-label
for list-name
input id list-name
type text
class text
name name
value
div class field
label class t1-label
for list-description
textarea id list-description
name description
span class help-text
fieldset class field
legend class t1-legend
div class options
label class t1-label
for list-public-radio
input class radio
type radio
name mode
id list-public-radio
value public
checked checked
label class t1-label
for list-private-radio
input class radio
type radio
name mode
id list-private-radio
value private
div class list-editor-save
button type button
class EdgeButton EdgeButton--secondary update-list-b...
data-list-id
div id activity-popup-dialog
class modal-container
class modal draggable
class modal-content clearfix
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class tweet-loading
class spinner-bigger
class activity-popup-dialog-content modal-tweet clea...
class loading
span class spinner-bigger
div class activity-popup-dialog-users clearfix
class activity-popup-dialog-footer
id copy-link-to-tweet-dialog
class modal-container
class modal modal-medium draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class copy-link-to-tweet-container
label class t1-label
p class copy-link-to-tweet-instructions
textarea class link-to-tweet-destination js-initial-focus u-dir
dir ltr
readonly readonly
div id embed-tweet-dialog
class modal-container
class modal modal-medium draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title embed-tweet-title
class modal-title embed-video-title
div class modal-body
class embed-code-container
p class embed-tweet-instructions
a href https://dev.twitter.com/web/embedded-tweets
target _blank
rel noopener
p class embed-video-instructions
a href https://dev.twitter.com/web/embedded-tweets
target _blank
rel noopener
form class t1-form
div class embed-destination-wrapper
class embed-overlay embed-overlay-spinner
class embed-overlay-content
class embed-overlay embed-overlay-error
p class embed-overlay-content
button type button
class btn-link retry-embed
textarea class embed-destination js-initial-focus
div class embed-options
class embed-include-parent-tweet
label class t1-label
for include-parent-tweet
input type checkbox
id include-parent-tweet
class include-parent-tweet
checked checked
div class embed-include-card
label class t1-label
for include-card
input type checkbox
id include-card
class include-card
checked checked
p class embed-tweet-description
a href https://dev.twitter.com/overview/terms/agreement
rel noopener
href https://dev.twitter.com/overview/terms/policy
rel noopener
h3 class embed-preview-header
div class embed-preview
id why-this-ad-dialog
class modal-container why-this-ad-dialog
class modal modal-large draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title why-this-ad-title
div class why-this-ad-content
class why-this-ad-spinner
class spinner-bigger
iframe id why-this-ad-frame
class hidden
aria-hidden true
scrolling auto
div id login-dialog
class LoginDialog modal-container u-textCenter
class modal modal-large draggable
class LoginDialog-content modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class LoginDialog-body modal-body
class LoginDialog-bird
span class Icon Icon--bird Icon--large
div class LoginDialog-form
form action https://twitter.com/sessions
class LoginForm js-front-signin
method post
data-component dialog
data-element login
div class LoginForm-input LoginForm-username
input type text
class text-input email-input js-signin-email
name session[username_or_email]
autocomplete username
placeholder Phone, email, or username
div class LoginForm-input LoginForm-password
input type password
class text-input
name session[password]
placeholder Password
autocomplete current-password
div class LoginForm-rememberForgot
input type checkbox
value 1
name remember_me
checked checked
span class separator
a class forgot
href /account/begin_password_reset
rel noopener
input type submit
class EdgeButton EdgeButton--primary EdgeButton--med...
value Log in
type hidden
name return_to_ssl
value true
type hidden
name scribe_log
type hidden
name redirect_after_login
value /
type hidden
value 84a68ea7047b5f969683265536ec597ebdfd4bb3
name authenticity_token
type hidden
name ui_metrics
autocomplete off
script src /i/js_inst?c_name=ui_metrics
async
div class LoginDialog-footer modal-footer u-textCenter
a class LoginDialog-signupLink
href https://twitter.com/signup
rel noopener
div id signup-dialog
class SignupDialog modal-container u-textCenter
class modal modal-large draggable
class SignupDialog-content modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class SignupDialog-body modal-body
class SignupDialog-icon
span class Icon Icon--bird Icon--extraLarge
h2 class SignupDialog-heading
div class SignupDialog-form
class signup SignupForm\n
a href https://twitter.com/signup
role button
class EdgeButton EdgeButton--large EdgeButton--prima...
data-component dialog
data-element signup
div class SignupDialog-footer modal-footer u-textCenter
a class SignupDialog-signinLink
href /login
rel noopener
div id sms-codes-dialog
class modal-container
class modal modal-medium draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
table id sms_codes
cellpadding 0
cellspacing 0
td rowspan 2
colspan 3
a class js-initial-focus
target _blank
href http://support.twitter.com/articles/14226-how-...
rel noopener
div id leadgen-confirm-dialog
class modal-container
class modal draggable
class modal-content
button type button
class modal-btn modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class leadgen-card-container
class media
iframe class cards2-promotion-iframe
scrolling no
frameborder 0
src
div class js-macaw-cards-iframe-container
data-card-name promotion
id auth-webview-dialog
class AuthWebViewDialog modal-container
class modal draggable
class modal-content
button type button
class modal-btn modal-close modal-close-fixed js-close
span class Icon Icon--close Icon--large
class visuallyhidden
div class modal-header
h3 class modal-title
div class modal-body
class auth-webview-view-container
class media
iframe class auth-webview-card-iframe js-initial-focus
scrolling no
frameborder 0
width 590px
height 500px
src
div id promptbird-modal-prompt
class modal-container
class modal
button type button
class modal-btn js-promptDismiss modal-close js-close
span class Icon Icon--close Icon--medium
class visuallyhidden
div class modal-content
id ui-walkthrough-dialog
class modal-container UIWalkthrough
class UIWalkthrough-clickBlocker
class modal modal-small
class UIWalkthrough-caret
class modal-content
class modal-body
class UIWalkthrough-header
span class UIWalkthrough-stepProgress
button class UIWalkthrough-skip js-close
div class UIWalkthrough-step UIWalkthrough-step--welcome
h3 class UIWalkthrough-title
span class Icon Icon--home UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--unfollow
h3 class UIWalkthrough-title
span class Icon Icon--smileRating1Fill UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--like
h3 class UIWalkthrough-title
span class Icon Icon--heart UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--retweet
h3 class UIWalkthrough-title
span class Icon Icon--retweet UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--reply
h3 class UIWalkthrough-title
span class Icon Icon--reply UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--trends
h3 class UIWalkthrough-title
span class Icon Icon--discover UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--wtf
h3 class UIWalkthrough-title
span class Icon Icon--follow UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--search
h3 class UIWalkthrough-title
span class Icon Icon--search UIWalkthrough-icon
p class UIWalkthrough-message
div class UIWalkthrough-step UIWalkthrough-step--moments
h3 class UIWalkthrough-title
span class Icon Icon--lightning UIWalkthrough-icon
p class UIWalkthrough-message
div class modal-footer
button class EdgeButton EdgeButton--tertiary u-floatLeft pl...
class EdgeButton EdgeButton--secondary UIWalkthrough...
div id create-custom-timeline-dialog
class modal-container
id edit-custom-timeline-dialog
class modal-container
id curate-dialog
class modal-container
id media-edit-dialog
class modal-container
class PermalinkOverlay PermalinkOverlay-with-backgro...
id permalink-overlay
class PermalinkProfile-dismiss modal-close-fixed
span class Icon Icon--close
button class PermalinkOverlay-next PermalinkOverlay-button ...
type button
span class Icon Icon--caretLeft Icon--large
class u-hiddenVisually
div class PermalinkOverlay-modal
class PermalinkOverlay-spinnerContainer u-hidden
class PermalinkOverlay-spinner
class PermalinkOverlay-content
class PermalinkOverlay-body
class hidden
id hidden-content
iframe aria-hidden true
class tweet-post-iframe
name tweet-post-iframe
aria-hidden true
class dm-post-iframe
name dm-post-iframe
input type hidden
id init-data
class json-data
value {"keyboardShortcuts":[{"name":"Actions","descr...
type hidden
class swift-boot-module
value app/pages/static_lohp
type hidden
id swift-module-path
value https://abs.twimg.com/k/swift/en
script src https://abs.twimg.com/k/en/init.en.3e84071c3bc...
async

Analyze GPX Data Recorded During A Flight from Fort Lauderdale to New Orleans

Currently editing on the plane.

GPX data recorded during an early morning Southwest Flight from Fort Lauderdale to New Orleans

Interact with this notebook on Binder .

Resources

Load the data from a Minio instance I have deployed.

In [144]:
import urllib.request
import itertools as it
from pprint import pprint
from functools import partial, reduce
import operator as op

# Define configured pprint suitable for notebooks
pprint_ = partial(pprint, indent=4)


def dhead(d: dict, n=5):
    """Return the first n items from a dictionary."""
    return {k: v for k, v in it.islice(d.items(), 0, n)}


with urllib.request.urlopen(
    "https://minio.apps.selfip.com/mymedia/gpx/fort_lauderdale__to__new_orleans.gpx"
) as res:
    data = res.read()

print(data.splitlines()[:10])
[b'<?xml version="1.0" encoding="UTF-8" standalone="no" ?>', b'<gpx xmlns="http://www.topografix.com/GPX/1/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gpx_style="http://www.topografix.com/GPX/gpx_style/0/2" xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd http://www.topografix.com/GPX/gpx_style/0/2 http://www.topografix.com/GPX/gpx_style/0/2/gpx_style.xsd" version="1.1" creator="Map Plus 2.8.7.1">', b'  <metadata>', b'    <link href="http://www.duweis.com">', b'      <text>Map Plus</text>', b'    </link>', b'    <time>2019-10-17T12:33:03Z</time>', b'  </metadata>', b'', b'  <trk>']

Parse the GPX file

In [145]:
from lxml import etree
In [146]:
tree = etree.fromstring(data, etree.XMLParser())

Display set of tags

In [147]:
{element.tag for element in tree.iter()}
Out[147]:
{'{http://www.topografix.com/GPX/1/1}cmt',
 '{http://www.topografix.com/GPX/1/1}ele',
 '{http://www.topografix.com/GPX/1/1}extensions',
 '{http://www.topografix.com/GPX/1/1}gpx',
 '{http://www.topografix.com/GPX/1/1}link',
 '{http://www.topografix.com/GPX/1/1}metadata',
 '{http://www.topografix.com/GPX/1/1}name',
 '{http://www.topografix.com/GPX/1/1}text',
 '{http://www.topografix.com/GPX/1/1}time',
 '{http://www.topografix.com/GPX/1/1}trk',
 '{http://www.topografix.com/GPX/1/1}trkpt',
 '{http://www.topografix.com/GPX/1/1}trkseg',
 '{http://www.topografix.com/GPX/gpx_style/0/2}color',
 '{http://www.topografix.com/GPX/gpx_style/0/2}line',
 '{http://www.topografix.com/GPX/gpx_style/0/2}width'}
In [149]:
set(tree.iterchildren()) == set(tree.iter())
Out[149]:
False
In [153]:
meta, trk, = tree.iterchildren()
In [160]:
*_, trkseg = trk.iterchildren()
In [162]:
data_points = list(trkseg.iter())
In [166]:
tags = {item.tag for item in data_points}

There is 1 trkseg element. It may be the root of all the location points.

In [171]:
[(tag, len([element for element in data_points if element.tag == tag])) for tag in tags]
Out[171]:
[('{http://www.topografix.com/GPX/1/1}ele', 1824),
 ('{http://www.topografix.com/GPX/1/1}time', 1829),
 ('{http://www.topografix.com/GPX/1/1}trkpt', 1829),
 ('{http://www.topografix.com/GPX/1/1}trkseg', 1)]
In [211]:
trkpnt_children = list(trkseg.iterchildren())
In [226]:
from collections import namedtuple
In [244]:
TrackPoint = namedtuple('TrackPoint', ('coordinate', 'ele', 'time'))
In [247]:
trkpnts_ = (
    ((element.attrib,), tuple(e.text for e in element.iterdescendants()))
    for element in trkpnt_children
)

trkpnts = [tuple(it.chain(*item)) for item in trkpnts_]

Not all items have a all three of ('coordinate', 'ele', 'time')

In [248]:
{len(items) for items in trkpnts}
Out[248]:
{2, 3}

See what is missing in those with only 2 parts.

In [251]:
[items for items in trkpnts if len(items) == 2]
Out[251]:
[({'lat': '26.07364077867658', 'lon': '-80.13974719286466'},
  '2019-10-17T10:32:23Z'),
 ({'lat': '26.07330806773481', 'lon': '-80.13861468216476'},
  '2019-10-17T10:32:44Z'),
 ({'lat': '26.07329140328246', 'lon': '-80.13861683228613'},
  '2019-10-17T10:33:09Z'),
 ({'lat': '26.07358034244973', 'lon': '-80.13865072072218'},
  '2019-10-17T10:34:37Z'),
 ({'lat': '26.07370411580322', 'lon': '-80.14069088505309'},
  '2019-10-17T10:34:46Z')]
In [252]:
[items for items in trkpnts if len(items) == 3][:5]
Out[252]:
[({'lat': '26.07408333333334', 'lon': '-80.136275'},
  '14',
  '2019-10-17T10:16:44Z'),
 ({'lat': '26.07371', 'lon': '-80.13643666666667'},
  '5.8',
  '2019-10-17T10:16:52Z'),
 ({'lat': '26.07379666666666', 'lon': '-80.13646999999999'},
  '1.1',
  '2019-10-17T10:17:24Z'),
 ({'lat': '26.07390333333334', 'lon': '-80.13640000000001'},
  '5.3',
  '2019-10-17T10:17:47Z'),
 ({'lat': '26.07400833333334', 'lon': '-80.13633'},
  '5.9',
  '2019-10-17T10:18:32Z')]

Rewrite the comprehensions to account for a lack of ele in a trkpnt.

In [ ]:
def trkpnt_handler(items):
    """Insert a None if there is no ele data point."""
    # Item at index 1 should be a digit.
    try:
        ele = float(items)
In [247]:
trkpnts_ = (
    ((element.attrib,), tuple(e.text for e in element.iterdescendants()))
    for element in trkpnt_children
)

trkpnts = [tuple(it.chain(*item)) for item in trkpnts_]

Begin copied cells below from another post.

Practice laziness in the sense of one of the Three Virtues: laziness, impatience, hubris

Issues

  1. I don't like having to retype strings that are dict keys. It's error-prone and taxes my memory. I would prefer a variable that I didn't have to manually define.
    Use Enum to create variables programatically. A plain dict would probably work, too. I like the way that an Enum is represented in output and it's type feature. And I am trying to find use cases for an Enum.
In [99]:
example = dict(FOO="foo")
globals().update(example)
FOO, example
Out[99]:
('foo', {'FOO': 'foo'})

Walk the data structure to get all the keys.

I wrote this function as an inspiration from the Stack Overflow question Access nested dictionary items via a list of keys?

In [30]:
def paths_in_data(data: dict, parent=()):
    """Calculate keys and/or indices in a nested dict."""

    if not any(isinstance(data, type_) for type_ in (dict, list, tuple)):
        return (parent,)
    else:
        try:  # Handle dict
            return reduce(
                op.add,
                (paths_in_data(v, op.add(parent, (k,))) for k, v in data.items()),
                (),
            )
        except AttributeError:  # Handle indexable sequences.
            return reduce(
                op.add,
                (paths_in_data(v, op.add(parent, (data.index(v),))) for v in data),
                (),
            )

Truncated example of the paths generated from paths_in_data.

In [32]:
[path for path in it.takewhile(lambda x: x[-1] != 2, paths_in_data(data))]
Out[32]:
[('type',),
 ('crs', 'type'),
 ('crs', 'properties', 'name'),
 ('features', 0, 'type'),
 ('features', 0, 'properties', 'GUID'),
 ('features', 0, 'properties', 'LABEL_EXPR'),
 ('features', 0, 'properties', 'TITLE'),
 ('features', 0, 'properties', 'LABEL_TEXT'),
 ('features', 0, 'properties', 'NOTES'),
 ('features', 0, 'geometry', 'type'),
 ('features', 0, 'geometry', 'coordinates', 0, 0),
 ('features', 0, 'geometry', 'coordinates', 0, 1)]

Get a set of all the keys.

In [44]:
data_key_set = sorted(
    {key for key in it.chain.from_iterable(paths_in_data(data)) if isinstance(key, str)}
)
_print(data_key_set)
[   'GUID',
    'LABEL_EXPR',
    'LABEL_TEXT',
    'NOTES',
    'TITLE',
    'coordinates',
    'crs',
    'features',
    'geometry',
    'name',
    'properties',
    'type']

Cast data_key_set into valid variable names

In [47]:
from string import digits, whitespace, punctuation

# Transform all whitespace and punctuation into underscores
# Not needed but left here as an example
translation = str.maketrans(dict(zip((*whitespace, *punctuation), it.cycle("_"))))

data_key_set_names = [
    key.translate(translation).strip(digits).upper() for key in data_key_set
]
_print(data_key_set_names)
[   'GUID',
    'LABEL_EXPR',
    'LABEL_TEXT',
    'NOTES',
    'TITLE',
    'COORDINATES',
    'CRS',
    'FEATURES',
    'GEOMETRY',
    'NAME',
    'PROPERTIES',
    'TYPE']

Define an Enum using the functional API.

In [50]:
from enum import Enum

DataKeys = Enum("DataKeys", type=str, names=zip(data_key_set_names, data_key_set))
_print(DataKeys.__members__)
mappingproxy({   'COORDINATES': <DataKeys.COORDINATES: 'coordinates'>,
                 'CRS': <DataKeys.CRS: 'crs'>,
                 'FEATURES': <DataKeys.FEATURES: 'features'>,
                 'GEOMETRY': <DataKeys.GEOMETRY: 'geometry'>,
                 'GUID': <DataKeys.GUID: 'GUID'>,
                 'LABEL_EXPR': <DataKeys.LABEL_EXPR: 'LABEL_EXPR'>,
                 'LABEL_TEXT': <DataKeys.LABEL_TEXT: 'LABEL_TEXT'>,
                 'NAME': <DataKeys.NAME: 'name'>,
                 'NOTES': <DataKeys.NOTES: 'NOTES'>,
                 'PROPERTIES': <DataKeys.PROPERTIES: 'properties'>,
                 'TITLE': <DataKeys.TITLE: 'TITLE'>,
                 'TYPE': <DataKeys.TYPE: 'type'>})

Add names from DataKeys to global namespace.

In [51]:
globals().update(DataKeys.__members__)

Inspect a variable

In [70]:
_print((FEATURES, type(FEATURES), isinstance(FEATURES, str)))
(<DataKeys.FEATURES: 'features'>, <enum 'DataKeys'>, True)

Get some specific data

In [71]:
def get_from(data, path):
    """Get a leaf from iterable of keys and/or indices.
    
    :data: Collection where nodes are either a dict or list.
    :path: Collection of keys and/or indices leading to a leaf.
    """
    return reduce(op.getitem, path, data)
In [76]:
paths = [
    (TYPE,),
    (CRS, TYPE),
    (CRS, PROPERTIES, NAME),
    (FEATURES, 0, GEOMETRY, COORDINATES, 0, 1),
]

for path in paths:
    _print(get_from(data, path))
'FeatureCollection'
'name'
'urn:ogc:def:crs:OGC:1.3:CRS84'
25.80153849443961

View in Pandas DataFrame

In [93]:
names = "lon lat ele".split()


class PandasColumn(Enum):
    """Extend Enum so that when a member is used as a Pandas data frame column its value is displayed."""

    def __str__(self):
        return self.value


CoordinateColumns = PandasColumn(
    "CoordinateColumn", type=str, names=zip((name.upper() for name in names), names)
)
globals().update(CoordinateColumns.__members__)
In [94]:
import pandas as pd

df = pd.DataFrame(
    get_from(data, (FEATURES, 0, GEOMETRY, COORDINATES)),
    columns=CoordinateColumns.__members__.values(),
)
df.head()
Out[94]:
lon lat ele
0 -80.203793 25.801538 -0.058535
1 -80.203824 25.801507 10.088560
2 -80.203784 25.801589 11.503721
3 -80.203711 25.801508 9.746153
4 -80.203605 25.801513 9.274504
In [90]:
df[LAT]
Out[90]:
0       25.801538
1       25.801507
2       25.801589
3       25.801508
4       25.801513
          ...    
1102    26.119918
1103    26.119874
1104    26.119792
1105    26.119739
1106    26.119739
Name: CoordinateColumn.LAT, Length: 1107, dtype: float64