pdf-replace/pdf_replace/print_links.py

from pathlib import Path

import pdfrw
from pdfrw import PdfDict, PdfArray, PdfString


def main():
    pdf_file = Path("pdf.pdf")
    print_links(pdf_file)


def print_links(pdf_file: Path):
    """
    Print all links found in the given PDF file.
    """
    pdf_reader = pdfrw.PdfReader(pdf_file)
    pages: list = pdf_reader.pages
    page: PdfDict
    for page_num, page in enumerate(pages):
        print(f"Page {page_num}")
        # Links are in /Annots
        annots: PdfArray = page['/Annots']
        for annot in annots:
            # The links are inside brackets, e.g. (https://example.com)
            uri: PdfString = annot['/A']['/URI']
            # But after decoding them, the brackets are gone
            uri_str: str = uri.decode()
            print(f'\t{uri_str}')


if __name__ == '__main__':
    main()