Call Python Code from Node.js

Interpret Python Code from Node.js with npm package PyNode.

Resources

In [15]:
//Use of var here to avoid having to reload the Jupyter Notebook on every run of a cell.
var fs = require('fs');
In [16]:
//Python code
var pythonCode = `
from string import ascii_uppercase

def add(x, y):
    return x + y

def uppercase():
    return list(ascii_uppercase)

`

Write the Python code into a file inside the working directory.

In [17]:
fs.writeFileSync('main.py', pythonCode)

Load the PyNode library.

In [18]:
var pynode = require('@fridgerator/pynode')
In [19]:
// optionally pass a path to use as Python module search path
pynode.startInterpreter()
In [20]:
// add current path as Python module search path, so it finds our app.py
pynode.appendSysPath('./')
In [21]:
// open the python file (module)
pynode.openFile('main')
In [22]:
// call the python function and get a return value
pynode.call('add', 1, 2, (err, result) => {
  if (err) return console.log('error : ', err)
  console.log({result: result === 3}); // true
})
{ result: true }
In [23]:
// call the python function and get a return value
pynode.call('uppercase', (err, result) => {
  if (err) return console.log('error : ', err)
  const uppercase = String.fromCharCode(...[...Array(26).keys()].map(i => i + 0b01000001));
  console.table(uppercase);
  console.log({result: result.every((letter, i) => letter === uppercase[i])}); //true
})
ABCDEFGHIJKLMNOPQRSTUVWXYZ
{ result: true }

My First Go Lang Function: Walk a Folder

My first code in GoLang.

In [1]:
import (
    "fmt"
    "os"
    "path/filepath"
)

func main() {
    var files []string

    root := "/tmp/.com.google.Chrome.5i0BtB"
    err := filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
        files = append(files, path)
        return nil
    })
    if err != nil {
        panic(err)
    }
    for _, file := range files {
        fmt.Println(file)
    }
}
In [2]:
main()
/tmp/.com.google.Chrome.5i0BtB
/tmp/.com.google.Chrome.5i0BtB/Default
/tmp/.com.google.Chrome.5i0BtB/Default/Code Cache
/tmp/.com.google.Chrome.5i0BtB/Default/Code Cache/js
/tmp/.com.google.Chrome.5i0BtB/Default/Code Cache/js/index
/tmp/.com.google.Chrome.5i0BtB/Default/Code Cache/js/index-dir
/tmp/.com.google.Chrome.5i0BtB/Default/Code Cache/js/index-dir/the-real-index
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache/data_0
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache/data_1
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache/data_2
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache/data_3
/tmp/.com.google.Chrome.5i0BtB/Default/GPUCache/index
/tmp/.com.google.Chrome.5i0BtB/Default/Preferences
/tmp/.com.google.Chrome.5i0BtB/Default/blob_storage
/tmp/.com.google.Chrome.5i0BtB/Default/blob_storage/038e823c-5176-4dec-8e3d-52469e731bc5
/tmp/.com.google.Chrome.5i0BtB/Default/chrome_debug.log
/tmp/.com.google.Chrome.5i0BtB/DevToolsActivePort
/tmp/.com.google.Chrome.5i0BtB/First Run
/tmp/.com.google.Chrome.5i0BtB/Local State

Re-establish Communication a with Selenium Browser Instance after the Script that Started it Has Exited

Currently editing.

Still getting to the point of re-establishing communication.

Find the chromedriver

In [1]:
import shutil

CHROMEDRIVER = "chromedriver"
chrome_driver = shutil.which(CHROMEDRIVER)
assert chrome_driver, f"Could not find executable '{CHROMEDRIVER}'."
chrome_driver
Out[1]:
'/usr/local/bin/chromedriver'

Create a selenium.webdriver.chrome.service.Service instance.

In [2]:
import selenium.webdriver.chrome.service as selservice

service = selservice.Service(chrome_driver)

My first attempt at using the python3.8 "walrus operator" in a comprehension.

In [3]:
exclusions = ("_", "stop", "start", "send", )
methods = [
    method
    for attr in dir(service)
    if not any(attr.startswith(exclusion) for exclusion in exclusions)
    and callable(method := getattr(service, attr))
]
methods
Out[3]:
[<bound method Service.assert_process_still_running of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>,
 <bound method Service.command_line_args of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>,
 <bound method Service.is_connectable of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>]

Sans assigment expression aka "walrus operator" the code might look like this.

In [4]:
exclusions = ("_", "stop", "start", "send")
values = ( 
    getattr(service, attr)
    for attr in dir(service)
    if not any(attr.startswith(exclusion) for exclusion in exclusions)
    # WET and inefficient to then say 'and callable(getattr(service, attr))' here
)
methods = [value for value in values if callable(value)]
print(methods)

def output_method_calls():
    for method in methods:
        try:
            print({method.__name__: method()})
        except AttributeError:
            print(f"'{method.__name__}' failed")
[<bound method Service.assert_process_still_running of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>, <bound method Service.command_line_args of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>, <bound method Service.is_connectable of <selenium.webdriver.chrome.service.Service object at 0x7f61c45c9eb0>>]

Start the service.

In [5]:
service.start()

Call the methods.

In [6]:
output_method_calls()
{'assert_process_still_running': None}
{'command_line_args': ['--port=52503']}
{'is_connectable': True}
In [7]:
!pgrep chrome
24242

Get a driver aka "browser".

In [8]:
from selenium import webdriver
from IPython.display import display, Image

capabilities = {"chrome.binary": chrome_driver}
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument("--headless")
driver = webdriver.Remote(
    service.service_url, desired_capabilities=capabilities, options=chrome_options
)
driver.get("http://example.com")
screenshot_path = "screenshot.png"
driver.save_screenshot(screenshot_path)
display(Image(screenshot_path))

Check for chromium processes.

They do not automatically stop.

In [9]:
!pgrep chrome
24242
24396
24405
24407
24422
24423
24440
24449

Save meta info about service so that re-connection can be established.

I discovered via pytesting that services and drivers created inside of a function die automatically after the function is called.

If the services and drivers exist in the module then they tend to persist after the script exits.

The chrome processes persist when this code is run in a script in the terminal.

selresus is a Python library I wrote to wrap the services and drivers needed to accomplish persisting chrome instances with which re-communication can be established.

In [3]:
"""Start a Chrome browser."""
from time import sleep
from selresus.serviceutils import get_service
from selresus.driverutils import get_headless_driver, get_headed_driver


def main():
    """If the drivers are created here and not returned then they are closed.
    They must exist inside of __main__ or they are closed. How does that happen?
    """
    service = get_service()
    service.start()
    driver = get_headless_driver(service)
    driver.get("http://example.com")
    return service, driver


if __name__ == "__main__":
    service, driver = main()
    assert all(item in driver.page_source for item in ("<html", "Example"))

The chrome processes do not persist when this code is run in a script in the terminal.

In [20]:
"""Start a Chrome browser."""
from time import sleep
from selresus.serviceutils import get_service
from selresus.driverutils import get_headless_driver, get_headed_driver


def main():
    """If the drivers are created here and not returned then they are closed.
    They must exist inside of __main__ or they are closed. How does that happen?
    """
    service = get_service()
    service.start()
    driver = get_headless_driver(service)
    driver.get("http://example.com")
    assert all(item in driver.page_source for item in ("<html", "Example"))

    # return service, driver


if __name__ == "__main__":
    main()

Define a JavaScript function called max using Array.prototype.reduce

Define a function called max that calculates the max number using Array.prototype.reduce.

In [2]:
function max(...args){
    return args.reduce((acc, x) => acc > x ? acc : x, -Infinity)
}

Generate some random data to test against Math.max.

In [3]:
function range(start, stop, step=1){
    if(step < 1) throw new Error('step must be 1 or greater.');
    if(stop <= start) throw new Error('stop must be larger than start');
    return Array.from({length: ((stop - start) / step)}, (_, i) => start + (i * step));
}
In [4]:
function getRandomInt(min, max) {
    return Math.floor((Math.random() * (max - min)) + min);
}
In [5]:
const [START, STOP] = [-1000, 1000],
    test_data = range(0, 10)
        .map(() => range(0, getRandomInt(1, 20))
             .map(() => getRandomInt(START, STOP)));
console.log(test_data.slice(0, 5));
[
  [
     591,  992,  191, -305,
    -428,  518,  541, -114,
    -537, -486, -163, -629,
    -489, -599, -484,  -73
  ],
  [ 958, -381, -799, 536, -939, 721 ],
  [ -149 ],
  [ -284 ],
  [ -989, 370, -831, -404, 204 ]
]

Verify that the max value was found in each array using Math.max as the standard.

In [6]:
test_data.every(array => max(...array) === Math.max(...array));
Out[6]:
true

Analyze GeoJSON Recorded During a Commute from Miami to Fort Lauderdale

GeoJSON recorded during an evening commute from Miami to Fort Lauderdale

Interact with this notebook on Binder .

Resources

Load the data from a Minio instance I have deployed.

In [58]:
import urllib.request
import json
import itertools as it
from pprint import pprint
from functools import partial, reduce
import operator as op

# Define configured pprint suitable for notebooks
_print = partial(pprint, indent=4)


def dhead(d: dict, n=5):
    """Return the first n items from a dictionary."""
    return {k: v for k, v in it.islice(d.items(), 0, n)}


with urllib.request.urlopen(
    "https://minio.apps.selfip.com/mymedia/geojson/MIA-to-FLL-TriRail-2019-10-08.geojson"
) as res:
    data = json.load(res)

for n in range(1, len(data)):
    _print(dhead(data, n))
{'type': 'FeatureCollection'}
{   'crs': {   'properties': {'name': 'urn:ogc:def:crs:OGC:1.3:CRS84'},
               'type': 'name'},
    'type': 'FeatureCollection'}

Practice laziness in the sense of one of the Three Virtues

Issues

  1. I don't like having to retype strings that are dict keys. It's error-prone and taxes my memory. I would prefer a variable that I didn't have to manually define.
    Use Enum to create variables programatically. A plain dict would probably work, too. I like the way that an Enum is represented in output and it's type feature. And I am trying to find use cases for an Enum.
In [99]:
example = dict(FOO="foo")
globals().update(example)
FOO, example
Out[99]:
('foo', {'FOO': 'foo'})

Walk the data structure to get all the keys.

I wrote this function as an inspiration from the Stack Overflow question Access nested dictionary items via a list of keys?

In [30]:
def paths_in_data(data: dict, parent=()):
    """Calculate keys and/or indices in a nested dict."""

    if not any(isinstance(data, type_) for type_ in (dict, list, tuple)):
        return (parent,)
    else:
        try:  # Handle dict
            return reduce(
                op.add,
                (paths_in_data(v, op.add(parent, (k,))) for k, v in data.items()),
                (),
            )
        except AttributeError:  # Handle indexable sequences.
            return reduce(
                op.add,
                (paths_in_data(v, op.add(parent, (data.index(v),))) for v in data),
                (),
            )

Truncated example of the paths generated from paths_in_data.

In [32]:
[path for path in it.takewhile(lambda x: x[-1] != 2, paths_in_data(data))]
Out[32]:
[('type',),
 ('crs', 'type'),
 ('crs', 'properties', 'name'),
 ('features', 0, 'type'),
 ('features', 0, 'properties', 'GUID'),
 ('features', 0, 'properties', 'LABEL_EXPR'),
 ('features', 0, 'properties', 'TITLE'),
 ('features', 0, 'properties', 'LABEL_TEXT'),
 ('features', 0, 'properties', 'NOTES'),
 ('features', 0, 'geometry', 'type'),
 ('features', 0, 'geometry', 'coordinates', 0, 0),
 ('features', 0, 'geometry', 'coordinates', 0, 1)]

Get a set of all the keys.

In [44]:
data_key_set = sorted(
    {key for key in it.chain.from_iterable(paths_in_data(data)) if isinstance(key, str)}
)
_print(data_key_set)
[   'GUID',
    'LABEL_EXPR',
    'LABEL_TEXT',
    'NOTES',
    'TITLE',
    'coordinates',
    'crs',
    'features',
    'geometry',
    'name',
    'properties',
    'type']

Cast data_key_set into valid variable names

In [47]:
from string import digits, whitespace, punctuation

# Transform all whitespace and punctuation into underscores
# Not needed but left here as an example
translation = str.maketrans(dict(zip((*whitespace, *punctuation), it.cycle("_"))))

data_key_set_names = [
    key.translate(translation).strip(digits).upper() for key in data_key_set
]
_print(data_key_set_names)
[   'GUID',
    'LABEL_EXPR',
    'LABEL_TEXT',
    'NOTES',
    'TITLE',
    'COORDINATES',
    'CRS',
    'FEATURES',
    'GEOMETRY',
    'NAME',
    'PROPERTIES',
    'TYPE']

Define an Enum using the functional API.

In [50]:
from enum import Enum

DataKeys = Enum("DataKeys", type=str, names=zip(data_key_set_names, data_key_set))
_print(DataKeys.__members__)
mappingproxy({   'COORDINATES': <DataKeys.COORDINATES: 'coordinates'>,
                 'CRS': <DataKeys.CRS: 'crs'>,
                 'FEATURES': <DataKeys.FEATURES: 'features'>,
                 'GEOMETRY': <DataKeys.GEOMETRY: 'geometry'>,
                 'GUID': <DataKeys.GUID: 'GUID'>,
                 'LABEL_EXPR': <DataKeys.LABEL_EXPR: 'LABEL_EXPR'>,
                 'LABEL_TEXT': <DataKeys.LABEL_TEXT: 'LABEL_TEXT'>,
                 'NAME': <DataKeys.NAME: 'name'>,
                 'NOTES': <DataKeys.NOTES: 'NOTES'>,
                 'PROPERTIES': <DataKeys.PROPERTIES: 'properties'>,
                 'TITLE': <DataKeys.TITLE: 'TITLE'>,
                 'TYPE': <DataKeys.TYPE: 'type'>})

Add names from DataKeys to global namespace.

In [51]:
globals().update(DataKeys.__members__)

Inspect a variable

In [70]:
_print((FEATURES, type(FEATURES), isinstance(FEATURES, str)))
(<DataKeys.FEATURES: 'features'>, <enum 'DataKeys'>, True)

Get some specific data

In [71]:
def get_from(data, path):
    """Get a leaf from iterable of keys and/or indices.
    
    :data: Collection where nodes are either a dict or list.
    :path: Collection of keys and/or indices leading to a leaf.
    """
    return reduce(op.getitem, path, data)
In [76]:
paths = [
    (TYPE,),
    (CRS, TYPE),
    (CRS, PROPERTIES, NAME),
    (FEATURES, 0, GEOMETRY, COORDINATES, 0, 1),
]

for path in paths:
    _print(get_from(data, path))
'FeatureCollection'
'name'
'urn:ogc:def:crs:OGC:1.3:CRS84'
25.80153849443961

View in Pandas DataFrame

In [93]:
names = "lon lat ele".split()


class PandasColumn(Enum):
    """Extend Enum so that when a member is used as a Pandas data frame column its value is displayed."""

    def __str__(self):
        return self.value


CoordinateColumns = PandasColumn(
    "CoordinateColumn", type=str, names=zip((name.upper() for name in names), names)
)
globals().update(CoordinateColumns.__members__)
In [94]:
import pandas as pd

df = pd.DataFrame(
    get_from(data, (FEATURES, 0, GEOMETRY, COORDINATES)),
    columns=CoordinateColumns.__members__.values(),
)
df.head()
Out[94]:
lon lat ele
0 -80.203793 25.801538 -0.058535
1 -80.203824 25.801507 10.088560
2 -80.203784 25.801589 11.503721
3 -80.203711 25.801508 9.746153
4 -80.203605 25.801513 9.274504
In [90]:
df[LAT]
Out[90]:
0       25.801538
1       25.801507
2       25.801589
3       25.801508
4       25.801513
          ...    
1102    26.119918
1103    26.119874
1104    26.119792
1105    26.119739
1106    26.119739
Name: CoordinateColumn.LAT, Length: 1107, dtype: float64

Conclusions

I was hoping that there would be some time information in the GeoJSON data.

After exploring the export options in Map Plus, I discovered an XML format that includes times. This will be more interesting.

The GeoJSON is adequate for longitude, latitude and elevation data. TODO: Display GeoJSON data in a Jupyter notebook.

GPX formatted data for same trip.

<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<gpx xmlns="http://www.topografix.com/GPX/1/1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:gpx_style="http://www.topografix.com/GPX/gpx_style/0/2" xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/1/1/gpx.xsd http://www.topografix.com/GPX/gpx_style/0/2 http://www.topografix.com/GPX/gpx_style/0/2/gpx_style.xsd" version="1.1" creator="Map Plus 2.8.6.2">
  <metadata>
    <link href="http://www.duweis.com">
      <text>Map Plus</text>
    </link>
    <time>2019-10-09T15:18:41Z</time>
  </metadata>

  <trk>
    <name>10/8/19</name>
    <cmt>50 km, 1 h 29 min</cmt>
    <extensions>
      <gpx_style:line>
        <gpx_style:color>ff7a00</gpx_style:color>
        <gpx_style:width>4000</gpx_style:width>
      </gpx_style:line>
    </extensions>
    <trkseg>
      <trkpt lat="25.80153849443961" lon="-80.20379332833011">
        <ele>-0.05853462</ele>
        <time>2019-10-09T00:55:50Z</time>
      </trkpt>
      <trkpt lat="25.80150727185029" lon="-80.20382425755281">
        <ele>10.08856</ele>
        <time>2019-10-09T00:55:54Z</time>
      </trkpt>

Eliminating Loops: A Number Guessing Game in Python

Interact with this notebook on Binder here.

Just for fun, let us take a quick look at how we could take out all loops from any Python program. Most of the time this is a bad idea, both for readability and performance, but it is worth looking at how simple it is to do in a systematic fashion as background to contemplate those cases where it is actually a good idea.

Excerpt From: David Mertz. Functional Programming in Python

In [2]:
from IPython.display import display, Image
In [3]:
display(
    Image(
        url="https://minio.apps.selfip.com/mymedia/screenshots/functional_programming_in_python.png"
    )
)

Basic REPL

In [4]:
def identity(item):
    print(f"output: {item}")
    return item


echo = lambda: identity(input("Type something:\r")) == "quit" or echo()
echo()
Type something:
Hello,
output: Hello,
Type something:
world.
output: world.
Type something:
quit
output: quit
Out[4]:
True
In [5]:
import random

EXIT_WORD = "quit"
EXIT_WORDS = {EXIT_WORD, "q", "bye", "exit"}
ANSWER = random.randrange(1, 11)
EXIT_WORDS.update([str(ANSWER)])


def get_verification(guess):
    guess_int = int(guess)
    if guess_int == ANSWER:
        return EXIT_WORD
    return "higher" if guess_int < ANSWER else "lower"


def identity(item):
    verification = hint = get_verification(item)
    if verification != EXIT_WORD:
        print(f'your guess: "{item}" should be {hint}')

    return verification


echo = lambda: identity(input("Type something:\r")) in EXIT_WORDS or echo()
echo()
Type something:
1
your guess: "1" should be higher
Type something:
5
your guess: "5" should be lower
Type something:
3
your guess: "3" should be lower
Type something:
2
Out[5]:
True

Make the game more complex. It now has state.

In [6]:
import random

EXIT_WORDS = {"quit", "q", "bye", "exit"}
START, STOP = 1, 11
END = STOP - 1
ANSWER = random.randrange(START, STOP)
EXIT_WORDS.update([str(ANSWER)])

GUESSES = 3
counter = 0


def count():
    global counter
    counter += 1


def get_message(guess):
    if guess in EXIT_WORDS:
        return guess
    guess_int = int(guess)
    if guess_int == ANSWER:
        return f'The answer "{guess_int}" is correct.'
    hint = "higher" if guess_int < ANSWER else "lower"
    return f'Guess {hint} than "{guess}".'


def identity(item):
    print(get_message(item))
    count()
    return item


echo = (
    lambda: any(
        (
            identity(input(f"Type a number between {START} and {END}:\r"))
            in EXIT_WORDS,
            counter > GUESSES,
        )
    )
    or echo()
)
echo()
Type a number between 1 and 10:
1
Guess higher than "1".
Type a number between 1 and 10:
5
Guess higher than "5".
Type a number between 1 and 10:
7
Guess higher than "7".
Type a number between 1 and 10:
10
Guess lower than "10".
Out[6]:
True

Refactor to handle malformed input.

Add a try, except where the guess is cast to an int.

In [7]:
import random

EXIT_WORDS = {"quit", "q", "bye", "exit"}
START, STOP = 1, 11
END = STOP - 1
ANSWER = random.randrange(START, STOP)
EXIT_WORDS.update([str(ANSWER)])

MAX_GUESSES = 3
counter = 0


def count():
    global counter
    counter += 1


def get_message(guess):
    if guess in EXIT_WORDS:
        return guess
    try:
        guess_int = int(guess)
    except ValueError:
        return f'"guess" is not valid input.'
    if guess_int == ANSWER:
        return f'The answer "{guess_int}" is correct.'
    hint = "higher" if guess_int < ANSWER else "lower"
    return f'Guess {hint} than "{guess}".'


def identity(item):
    print(get_message(item))
    count()
    return item


echo = (
    lambda: any(
        (
            identity(input(f"Type a number between {START} and {END}:\r"))
            in EXIT_WORDS,
            counter >= MAX_GUESSES,
        )
    )
    or echo()
)
echo()
if counter >= GUESSES:
    print(f'Maximum guesses of "{MAX_GUESSES}" exceeded.')
f"The answer is {ANSWER}"
Type a number between 1 and 10:
cat
"guess" is not valid input.
Type a number between 1 and 10:
1
Guess higher than "1".
Type a number between 1 and 10:
10
Guess lower than "10".
Maximum guesses of "3" exceeded.
Out[7]:
'The answer is 7'

Refactor to remove the lambda.

Add a try, except where the guess is cast to an int.

In [8]:
import random

EXIT_WORDS = {"quit", "q", "bye", "exit"}
START, STOP = 1, 11
END = STOP - 1
ANSWER = random.randrange(START, STOP)
EXIT_WORDS.update([str(ANSWER)])

MAX_GUESSES = 3
counter = 0


def count():
    global counter
    counter += 1


def get_message(guess):
    if guess in EXIT_WORDS:
        return guess
    try:
        guess_int = int(guess)
    except ValueError:
        return f'"guess" is not valid input.'
    if guess_int == ANSWER:
        return f'The answer "{guess_int}" is correct.'
    hint = "higher" if guess_int < ANSWER else "lower"
    return f'Guess {hint} than "{guess}".'


def identity(item):
    print(get_message(item))
    count()
    return item


def echo():
    return (
        any(
            (
                identity(input(f"Type a number between {START} and {END}:\r"))
                in EXIT_WORDS,
                counter >= MAX_GUESSES,
            )
        )
        or echo()
    )


echo()
if counter >= GUESSES:
    print(f'Maximum guesses of "{MAX_GUESSES}" exceeded.')
f"The answer is {ANSWER}"
Type a number between 1 and 10:
1
Guess higher than "1".
Type a number between 1 and 10:
10
Guess lower than "10".
Type a number between 1 and 10:
5
5
Maximum guesses of "3" exceeded.
Out[8]:
'The answer is 5'

A Shortcut for Creating Individual Names to Refer to Pandas Columns

Read csv file from url into Pandas Dataframe

In [19]:
from enum import Enum, unique
from string import punctuation, whitespace
import itertools as it
from pprint import pprint
In [2]:
import pandas as pd
from IPython.display import display
In [3]:
df = pd.read_csv('https://static.apps.selfip.com/codio.csv')

Create Enum items

Create a translation map to replace all whitespace and punctuation with an underscore to create a valid variable name.

In [4]:
translation = str.maketrans(dict(zip(punctuation + whitespace, it.cycle('_'))))

Look for an easier and more precise way to define column names as variables than typing them by hand.

In [5]:
df.columns
Out[5]:
Index(['student name', 'first name', 'last name', 'username', 'hashed id',
       'email', 'unit name', 'right answers', 'right count', 'wrong answers',
       'wrong count', 'answered assessments', 'points/max_points', 'points',
       'total points', 'time spent', 'grade', 'comment', 'created',
       'completed', 'completed date', 'answers', 'attempts'],
      dtype='object')

Update from the hacky way I had done this in an earlier version of this post.

The Enum class has a functional API.

In [21]:
names = ((column.translate(translation).upper(), column,) for column in df.columns)
ColumnNames = Enum('ColumnNames', names=names, type=str)
pprint(ColumnNames.__members__, indent=4)
mappingproxy(OrderedDict([   (   'STUDENT_NAME',
                                 <ColumnNames.STUDENT_NAME: 'student name'>),
                             (   'FIRST_NAME',
                                 <ColumnNames.FIRST_NAME: 'first name'>),
                             (   'LAST_NAME',
                                 <ColumnNames.LAST_NAME: 'last name'>),
                             ('USERNAME', <ColumnNames.USERNAME: 'username'>),
                             (   'HASHED_ID',
                                 <ColumnNames.HASHED_ID: 'hashed id'>),
                             ('EMAIL', <ColumnNames.EMAIL: 'email'>),
                             (   'UNIT_NAME',
                                 <ColumnNames.UNIT_NAME: 'unit name'>),
                             (   'RIGHT_ANSWERS',
                                 <ColumnNames.RIGHT_ANSWERS: 'right answers'>),
                             (   'RIGHT_COUNT',
                                 <ColumnNames.RIGHT_COUNT: 'right count'>),
                             (   'WRONG_ANSWERS',
                                 <ColumnNames.WRONG_ANSWERS: 'wrong answers'>),
                             (   'WRONG_COUNT',
                                 <ColumnNames.WRONG_COUNT: 'wrong count'>),
                             (   'ANSWERED_ASSESSMENTS',
                                 <ColumnNames.ANSWERED_ASSESSMENTS: 'answered assessments'>),
                             (   'POINTS_MAX_POINTS',
                                 <ColumnNames.POINTS_MAX_POINTS: 'points/max_points'>),
                             ('POINTS', <ColumnNames.POINTS: 'points'>),
                             (   'TOTAL_POINTS',
                                 <ColumnNames.TOTAL_POINTS: 'total points'>),
                             (   'TIME_SPENT',
                                 <ColumnNames.TIME_SPENT: 'time spent'>),
                             ('GRADE', <ColumnNames.GRADE: 'grade'>),
                             ('COMMENT', <ColumnNames.COMMENT: 'comment'>),
                             ('CREATED', <ColumnNames.CREATED: 'created'>),
                             (   'COMPLETED',
                                 <ColumnNames.COMPLETED: 'completed'>),
                             (   'COMPLETED_DATE',
                                 <ColumnNames.COMPLETED_DATE: 'completed date'>),
                             ('ANSWERS', <ColumnNames.ANSWERS: 'answers'>),
                             ('ATTEMPTS', <ColumnNames.ATTEMPTS: 'attempts'>)]))

Add members of ColumnNames to global

In [15]:
globals().update(ColumnNames.__members__)

print(repr(GRADE))
<ColumnNames.GRADE: 'grade'>

Results!

Now there are variables available to use as keys on the dataframe. Typing them by hand would have been tedious and error prone.

A dict could work here as well. I like the way that a member of the Enum is printed though. The labeling is nice.

Display the desired subset of columns.

In [16]:
df[[FIRST_NAME, GRADE, COMMENT]]
Out[16]:
first name grade comment
0 Andy NaN NaN
1 Student A 100.0 NaN
2 Student B NaN NaN
3 Andy 100.0 NaN
4 Student A 87.0 NaN
5 Andy NaN NaN

Iterate over column names.

In [17]:
for column in ColumnNames:
    display(df[column])
0        Andy Weiss
1    Student A Test
2    Student B Test
3        Andy Weiss
4    Student A Test
5        Andy Weiss
Name: student name, dtype: object
0         Andy
1    Student A
2    Student B
3         Andy
4    Student A
5         Andy
Name: first name, dtype: object
0    Weiss
1     Test
2     Test
3    Weiss
4     Test
5    Weiss
Name: last name, dtype: object
0                   aweiss6
1    stest-wyncode-academy1
2    stest-wyncode-academy2
3                   aweiss6
4    stest-wyncode-academy1
5                   aweiss6
Name: username, dtype: object
0    5b15908826b16c2fbbb6c25efcccefa17807c6b0
1    613a2940b95692a0edb208998ee82f4e5946e304
2    74dd20928d48e82a070737d2d4e3c7f40c709bc2
3    5b15908826b16c2fbbb6c25efcccefa17807c6b0
4    613a2940b95692a0edb208998ee82f4e5946e304
5    5b15908826b16c2fbbb6c25efcccefa17807c6b0
Name: hashed id, dtype: object
0               andyweiss1982@gmail.com
1    student+wyncode-academy1@codio.com
2    student+wyncode-academy2@codio.com
3               andyweiss1982@gmail.com
4    student+wyncode-academy1@codio.com
5               andyweiss1982@gmail.com
Name: email, dtype: object
0                       Fundamentals Of HTML
1    Setting up your development environment
2    Setting up your development environment
3    Setting up your development environment
4                Fundamentals Of Programming
5                Fundamentals Of Programming
Name: unit name, dtype: object
0                                        "The Warm Up"
1    "Use the correct OS, Install the Chrome Browse...
2                                 "Use the correct OS"
3    "Use the correct OS, Install the Chrome Browse...
4    "Intro to command line, More Command Line Prac...
5    "Intro to command line, More Command Line Prac...
Name: right answers, dtype: object
0     1
1    17
2     1
3    17
4     7
5     7
Name: right count, dtype: int64
0                      "Intro to HTML"
1                                   ""
2                                   ""
3                                   ""
4    "Learn Command Line The Hard Way"
5                                   ""
Name: wrong answers, dtype: object
0    1
1    0
2    0
3    0
4    1
5    0
Name: wrong count, dtype: int64
0     2
1    17
2     1
3    17
4     8
5     7
Name: answered assessments, dtype: int64
0      1/3
1    17/17
2     1/17
3    17/17
4      7/8
5      7/8
Name: points/max_points, dtype: object
0     1
1    17
2     1
3    17
4     7
5     7
Name: points, dtype: int64
0     3
1    17
2    17
3    17
4     8
5     8
Name: total points, dtype: int64
0    295
1    232
2     44
3    450
4    139
5    368
Name: time spent, dtype: int64
0      NaN
1    100.0
2      NaN
3    100.0
4     87.0
5      NaN
Name: grade, dtype: float64
0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
5   NaN
Name: comment, dtype: float64
0     Sep 24th 2019 3:17pm
1    Sep 16th 2019 12:28pm
2     Sep 18th 2019 9:07pm
3     Sep 24th 2019 1:13am
4     Sep 17th 2019 3:55pm
5     Sep 24th 2019 1:22am
Name: created, dtype: object
0    False
1     True
2    False
3     True
4     True
5    False
Name: completed, dtype: bool
0                                        NaN
1    Wed Sep 18 2019 13:07:54 GMT+0000 (UTC)
2                                        NaN
3    Tue Sep 24 2019 01:21:48 GMT+0000 (UTC)
4    Wed Sep 18 2019 13:11:32 GMT+0000 (UTC)
5                                        NaN
Name: completed date, dtype: object
0    "The Warm Up:\nstdout='complete'\nstderr=''\nI...
1    "Use the correct OS:\nstdout=''complete''\nstd...
2    "Use the correct OS:\nstdout=''complete''\nstd...
3    "Use the correct OS:\nstdout=''complete''\nstd...
4    "Intro to command line:\nstdout=''complete''\n...
5    "Intro to command line:\nstdout=''complete''\n...
Name: answers, dtype: object
0     "resume:\n0\nThe Warm Up:\n1\nIntro to HTML:\n1"
1    "Use the correct OS:\n1\nInstall the Chrome Br...
2    "Use the correct OS:\n1\nInstall the Chrome Br...
3    "Use the correct OS:\n1\nInstall the Chrome Br...
4    "Intro to command line:\n1\nLearn Command Line...
5    "Intro to command line:\n1\nLearn Command Line...
Name: attempts, dtype: object

Create a Form in a Jupyter Notebook that sets Python Names

Jupyter Notebook: Javascript/Python Bi-directional Communication

Interact with this notebook on Binder here.

In [6]:
# define a Python variable

foo = None
print(foo)
None

Execute JavaScript in the cell by using %%javascript magic.

In [7]:
%%javascript

const kernel = IPython.notebook.kernel;
//The string passed to the execute function has to be valid Python.
kernel.execute('foo = "Hello, from the browser\'s JavaScript world."');

NOTE: If you run all the cells at once, the variable is not set.

This is likely due to the async nature of the JS runtime environment in the browser.

This demo works best by running one cell at a time.

In [8]:
# The Javascript above runs async. Wait for it to finish.
print(foo)
Hello, from the browser's JavaScript world.

The Python variable foo was set to "Hello, from the browser's JavaScript world." by the JavaScript environment.

Display a button by using %%html magic.

Note: After this notebook is parsed into a Nikola blog post, the button does not function.

Try out the executable version of this notebook here.

In [9]:
%%html
<script>
//Using let and const becomes problematic. Have to reload after changes.
var   kernel = IPython.notebook.kernel,
      //Convenient function to add listeners to elements
      on = (el, evt, fn, opts = {}) => {
        const delegatorFn = e => e.target.matches(opts.target) && fn.call(e.target, e);
        el.addEventListener(evt, opts.target ? delegatorFn : fn, opts.options || false);
        if (opts.target) return delegatorFn;
      },
      prefix = 'button click count: ',
      countElement = document.getElementById('count'),
      count = parseInt(countElement.textContent, 10) || 0,
      increment = () => {
          count += 1;
          countElement.textContent = count;
          //The string passed to execute must be valid Python
          kernel.execute(`foo = ${count}`);
          setContent();
      },
      inputElement = document.getElementById('message-input'),
      setMessage = () => {
          //Define a Python variable called "message".
          kernel.execute(`message = "${inputElement.value}"`);
      }
      setContent = () => countElement.textContent = `${prefix}${count}`;
on(document.getElementById('increment'), 'click', increment);
on(inputElement, 'blur', setMessage);
setContent()
</script>
<div class="input-group input-group-sm mb-3">
  <div class="input-group-prepend">
    <span class="input-group-text" id="inputGroup-sizing-sm">Short Message</span>
  </div>
  <input id="message-input" type="text" class="form-control" aria-label="Small" aria-describedby="inputGroup-sizing-sm">
</div>
<p id="count"></p>
<button id='increment' type="submit" class="btn btn-primary">
    Submit
</button>
Short Message

Observe that the Python variable was set inside of the browser's environment.

In [10]:
print(f"The button was clicked {foo} times.\n{message}")
The button was clicked 10 times.
Jupyter notebooks are useful.

Notes on Building Python Packages with a Cookiecutter Template

I have been using the tool Flit to build Python packages because it was simple.

Flit is a simple way to put Python packages and modules on PyPI.

I want to try something more complex.

These are some notes to myself about using a Cookiecutter template called cookiecutter-pylibrary.

Using pexpect with cookiecutter as in this example is fragile. Time to read the Cookiecutter documentation on suppressing command-line prompts.

It is a "TODO" to document --no-input

@click.option(
    u'--no-input', is_flag=True,
    help=u'Do not prompt for parameters and only use cookiecutter.json '
         u'file content',
)

A solution.

Use a cookiecutter.json file.

cookiecutter  ~/projects/cookiecutters/cookiecutter-pylibrary --no-input

Use Python code to create temp directory for output-dir to quickly create "hello world" templates with which to practice building Python libraries

The code is in progress.

Generate a Pyramid Project with a Python Script that Interacts with the Cookiecutter REPL.

Generate a Pyramid project using a Python script to interact with Cookiecutter via Pexpect.

We will create a Pyramid project in your home directory for Unix or at the root for Windows. It is assumed you know the path to where you installed cookiecutter. Issue the following commands and override the defaults in the prompts as follows.

I originally attempted to open a subprocess with subprocess.Popen and then communicate with the process. I learned from this stackoverflow answer that it gets complicated with a REPL situation.

In [3]:
import tempfile
from pathlib import Path
import os
import pickle

from IPython.display import display, HTML

BASE_DIR = Path(Path.home(), 'scratch')
PROJECT_NAME = 'pyramid_practice'
PREFIX = f'{PROJECT_NAME}_'
WORKING_DIR_NAME = tempfile.mkdtemp(prefix=PREFIX, dir=BASE_DIR)
WORKING_DIR = BASE_DIR.joinpath(WORKING_DIR_NAME)
assert WORKING_DIR.exists(), "No working directory exists."

# Save WORKING_DIR value in a pickle to use in another blog post.
Path('working_dir.pk').write_bytes(pickle.dumps(WORKING_DIR))
Out[3]:
98

Use Cookiecutter and Pexpect to scaffold the project

Update

Cookiecutter has a mechanism for no input.

The purpose of this exercise was to practice pexpect.

In [4]:
import pexpect

RETURN_KEY = 'return_key'
DEFAULT, TWO = '\n', '2'
inputs = dict(
    default=DEFAULT,
    project_name=PROJECT_NAME,
    repo_name=DEFAULT,
    template_lang=TWO, # Chameleon
    orm=TWO, # SQLAlchemy
)

os.chdir(WORKING_DIR)
command = 'cookiecutter gh:Pylons/pyramid-cookiecutter-starter --checkout 1.10-branch'
child = pexpect.spawnu(command)
EXPECTED = ']:'
print(f'working directory: {Path(os.curdir).absolute().parts[-1]}')
for value in inputs.values():
    child.expect(EXPECTED)
    if child.before:
        display(HTML((f'<h3>{child.before}{EXPECTED}<h3>')))
    child.sendline(value)
    display(HTML((f'<h4>sending "{value.strip() or RETURN_KEY}"<h4>')))
    
assert not child.wait(), "There was an error in the subprocess."
working directory: pyramid_practice_y2bz1v0h

You've downloaded /home/dmmmd/.cookiecutters/pyramid-cookiecutter-starter before. Is it okay to delete and re-download it? [yes]:

sending "return_key"

project_name [Pyramid Scaffold]:

sending "pyramid_practice"

repo_name [pyramid_scaffold]:

sending "return_key"

pyramid_practice Select template_language: 1 - jinja2 2 - chameleon 3 - mako Choose from 1, 2, 3 (1, 2, 3) [1]:

sending "2"

Select backend: 1 - none 2 - sqlalchemy 3 - zodb Choose from 1, 2, 3 (1, 2, 3) [1]:

sending "2"
In [5]:
for base, dirs, files in os.walk(WORKING_DIR):
    for dir_ in dirs:
        for file in files:
            print(Path(*Path(base, dir_, file).parts[-2:]))
pyramid_practice/pytest.ini
pyramid_practice/CHANGES.txt
pyramid_practice/.gitignore
pyramid_practice/README.txt
pyramid_practice/setup.py
pyramid_practice/MANIFEST.in
pyramid_practice/production.ini
pyramid_practice/development.ini
pyramid_practice/.coveragerc
static/routes.py
static/tests.py
static/__init__.py
templates/routes.py
templates/tests.py
templates/__init__.py
views/routes.py
views/tests.py
views/__init__.py