pdf-replace/pdf_replace/print_links.py

33 lines
830 B
Python
Raw Normal View History

2024-01-23 14:15:27 +01:00
from pathlib import Path
import pdfrw
from pdfrw import PdfDict, PdfArray, PdfString
def main():
pdf_file = Path("pdf.pdf")
print_links(pdf_file)
def print_links(pdf_file: Path):
"""
Print all links found in the given PDF file.
"""
pdf_reader = pdfrw.PdfReader(pdf_file)
pages: list = pdf_reader.pages
page: PdfDict
for page_num, page in enumerate(pages):
print(f"Page {page_num}")
# Links are in /Annots
annots: PdfArray = page['/Annots']
for annot in annots:
# The links are inside brackets, e.g. (https://example.com)
uri: PdfString = annot['/A']['/URI']
# But after decoding them, the brackets are gone
uri_str: str = uri.decode()
print(f'\t{uri_str}')
if __name__ == '__main__':
main()