mirror of
https://codeberg.org/privacy1st/pdf-replace
synced 2024-11-21 22:03:20 +01:00
33 lines
830 B
Python
33 lines
830 B
Python
from pathlib import Path
|
|
|
|
import pdfrw
|
|
from pdfrw import PdfDict, PdfArray, PdfString
|
|
|
|
|
|
def main():
|
|
pdf_file = Path("pdf.pdf")
|
|
print_links(pdf_file)
|
|
|
|
|
|
def print_links(pdf_file: Path):
|
|
"""
|
|
Print all links found in the given PDF file.
|
|
"""
|
|
pdf_reader = pdfrw.PdfReader(pdf_file)
|
|
pages: list = pdf_reader.pages
|
|
page: PdfDict
|
|
for page_num, page in enumerate(pages):
|
|
print(f"Page {page_num}")
|
|
# Links are in /Annots
|
|
annots: PdfArray = page['/Annots']
|
|
for annot in annots:
|
|
# The links are inside brackets, e.g. (https://example.com)
|
|
uri: PdfString = annot['/A']['/URI']
|
|
# But after decoding them, the brackets are gone
|
|
uri_str: str = uri.decode()
|
|
print(f'\t{uri_str}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|