Answer Stack Overflow Question: How can I create a dictionary from two different row values in excel in python?

Answer Stack Overflow Question: How can I create a dictionary from two different row values in excel in python?

Create some sample data into an Excel spreadsheet.

In [29]:
from pathlib import Path
import tempfile
from subprocess import check_output, STDOUT
import shlex

from faker import Faker
import openpyxl
In [30]:
fake = Faker()
In [31]:
wb = openpyxl.Workbook()
ws = wb.active
In [32]:
def get_unique_keys(length, fake_attr, f):
    items = set()
    while len(items) < length:
        items.update(set((f(getattr(fake, fake_attr)()),)))
    return list(items)
In [33]:
LENGTH = 10
for row in (
    get_unique_keys(*args)
    for _ in range(3)
    for args in (
        (LENGTH, "uuid4", lambda x: x[:8]),
        (LENGTH, "catch_phrase", lambda x: x.split()[-1]),
    )
):
    ws.append(row)
In [34]:
path = Path(tempfile.mkdtemp()).joinpath('fake_xl.xlxs')
path
Out[34]:
PosixPath('/tmp/tmpgx8zlnfu/fake_xl.xlxs')
In [35]:
wb.save(path.as_posix())
In [36]:
check_output(shlex.split(f"mc cp {path} dokkuminio/mymedia/xlxs/"))
Out[36]:
b'`/tmp/tmpgx8zlnfu/fake_xl.xlxs` -> `dokkuminio/mymedia/xlxs/fake_xl.xlxs`\nTotal: 5.05 KB, Transferred: 5.05 KB, Speed: 1.07 MB/s\n'

A solution to the question.

In [53]:
import urllib.request
import xlrd
import itertools as it

url = "https://minio.apps.selfip.com/mymedia/xlxs/fake_xl.xlxs"

filepath, response = urllib.request.urlretrieve(url, "/tmp/test.xlxs") 

wb = xlrd.open_workbook(filepath)
ws = wb.sheet_by_index(0)

odd_rows, even_rows = it.tee(ws.get_rows()) # Get 2 iterables using itertools.tee
row_pairs = ( # generator expression to "chunk the rows into groups of 2"
    [[cell.value for cell in row] for row in rows] # 2D list of values
    for rows in zip(
        it.islice(odd_rows, 0, ws.nrows, 2), # Use itertools.islice
        it.islice(even_rows, 1, ws.nrows, 2),
    )
)
print([dict(zip(*row_pair)) for row_pair in row_pairs])

# Another way to chunk the rows into pairs is like this:

print([
    dict(zip(*[[cell.value for cell in pair] for pair in pairs]))
    for pairs in it.zip_longest(*it.repeat(iter(ws.get_rows()), 2))
])
[{'2a5de626': 'algorithm', '86ce99a2': 'implementation', 'e6b481ba': 'adapter', 'bc85c996': 'capability', '4edfb828': 'array', '05d79ce2': 'definition', 'b9b5ae33': 'knowledgebase', 'f0da7366': 'complexity', '39a48259': 'methodology', '1ee95d9e': 'strategy'}, {'01bc389d': 'neural-net', 'd5d16b0c': 'monitoring', 'd9fb3a8d': 'installation', '8c7a049f': 'moratorium', 'f3d9aa0e': 'help-desk', 'd0e8d371': 'paradigm', '9e33f679': 'complexity', '6354affc': 'core', '606c4eb6': 'groupware', '97741196': 'strategy'}, {'76ae32df': 'algorithm', '942654da': 'task-force', '462fa31b': 'ability', '584df007': 'adapter', 'f6293960': 'attitude', 'afd8fa00': 'knowledgebase', '4c5f2c49': 'alliance', '6d76c690': 'collaboration', '3018a22b': 'solution', '034f1bb2': 'access'}]
[{'2a5de626': 'algorithm', '86ce99a2': 'implementation', 'e6b481ba': 'adapter', 'bc85c996': 'capability', '4edfb828': 'array', '05d79ce2': 'definition', 'b9b5ae33': 'knowledgebase', 'f0da7366': 'complexity', '39a48259': 'methodology', '1ee95d9e': 'strategy'}, {'01bc389d': 'neural-net', 'd5d16b0c': 'monitoring', 'd9fb3a8d': 'installation', '8c7a049f': 'moratorium', 'f3d9aa0e': 'help-desk', 'd0e8d371': 'paradigm', '9e33f679': 'complexity', '6354affc': 'core', '606c4eb6': 'groupware', '97741196': 'strategy'}, {'76ae32df': 'algorithm', '942654da': 'task-force', '462fa31b': 'ability', '584df007': 'adapter', 'f6293960': 'attitude', 'afd8fa00': 'knowledgebase', '4c5f2c49': 'alliance', '6d76c690': 'collaboration', '3018a22b': 'solution', '034f1bb2': 'access'}]