Edit a PDF document with Python reportlab library

Edit downloaded resume from Stack Overflow using Python

The downloaded resume from Stack Overflow does not contain my full name.

This Python code adds my full name and removes the last page.

The code below does this:

In [63]:
import io

from subprocess import check_call
import shlex
import urllib.request
from pathlib import Path

from PyPDF2 import PdfFileWriter, PdfFileReader
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from IPython.display import display, IFrame, HTML
from chamelboots import ChameleonTemplate as CT
from chamelboots import TalStatement as TS
In [64]:
pdf_url = (
    "https://minio.apps.selfip.com/mymedia/resumes/Don-Morehouse-editable.pdf"
)
In [65]:
# load the pdf into a temp file
(document,) = (Path(fp) for fp, _ in (urllib.request.urlretrieve(pdf_url),))
document
Out[65]:
PosixPath('/tmp/tmp12j20ysy')
In [66]:
packet = io.BytesIO()
# create a new PDF with Reportlab
can = canvas.Canvas(packet, pagesize=letter)
textobject = can.beginText()
textobject.setTextOrigin(125, 754)
textobject.setFont("Helvetica-Bold", 24)
textobject.setFillColor("#005992")
textobject.textLine(text="Morehouse")
can.drawText(textobject)
can.save()
In [67]:
# move to the beginning of the StringIO buffer
packet.seek(0)
new_pdf = PdfFileReader(packet)
# read your existing PDF
with open(document, "rb") as fh:
    existing_pdf = PdfFileReader(fh)
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    page = existing_pdf.getPage(0)
    page.mergePage(new_pdf.getPage(0))
    output.addPage(page)
    for page in (existing_pdf.getPage(i) for i in range(1, existing_pdf.numPages-1)):
        output.addPage(page)
    # finally, write "output" to a real file
    with open(DESTINATION := "Don-Morehouse-latest.pdf", "wb") as outputStream:
        output.write(outputStream)
        outputStream.close()
In [68]:
# upload the pdf for viewing
BUCKET = "resumes"
check_call(shlex.split(f"mc cp {DESTINATION} dokkuminio/mymedia/{BUCKET}/{DESTINATION}"))

display(
    HTML(
        CT(
            "a",
            (
                ATTR_CONTENT := (
                    TS(CONTENT := "content", f"structure {CONTENT}"),
                    TS(ATTRIBUTES := "attributes", ATTRIBUTES),
                )
            ),
        ).render(
            content=CT("h2", (TS(CONTENT, CONTENT),)).render(
                content="view the pdf: 'Morehouse' is added at the top"
            ),
            attributes=dict(
                href=f"https://minio.apps.selfip.com/mymedia/{BUCKET}/{DESTINATION}",
                target="_blank",
            ),
        )
    )
)