from pathlib import Path import pdfrw from pdfrw import PdfDict, PdfArray, PdfString def main(): pdf_file = Path("pdf.pdf") print_links(pdf_file) def print_links(pdf_file: Path): """ Print all links found in the given PDF file. """ pdf_reader = pdfrw.PdfReader(pdf_file) pages: list = pdf_reader.pages page: PdfDict for page_num, page in enumerate(pages): print(f"Page {page_num}") # Links are in /Annots annots: PdfArray = page['/Annots'] for annot in annots: # The links are inside brackets, e.g. (https://example.com) uri: PdfString = annot['/A']['/URI'] # But after decoding them, the brackets are gone uri_str: str = uri.decode() print(f'\t{uri_str}') if __name__ == '__main__': main()