mirror of
https://codeberg.org/privacy1st/image-width-limit
synced 2024-12-03 22:15:02 +01:00
90 lines
2.5 KiB
Python
90 lines
2.5 KiB
Python
#!/usr/bin/python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from pathlib import Path
|
|
from sys import argv
|
|
from typing import Dict
|
|
|
|
from bs4 import BeautifulSoup, ResultSet
|
|
|
|
|
|
def main():
|
|
"""
|
|
Python3 dependencies:
|
|
-> beautifulsoup4 (pacman -S python-beautifulsoup4)
|
|
-> html5lib (pacman -S python-html5lib)
|
|
"""
|
|
|
|
if len(argv) < 2 or len(argv) > 3:
|
|
raise "usage: <input_file> [<output_file>]"
|
|
input_file = Path(argv[1])
|
|
|
|
if len(argv) == 3:
|
|
output_file = Path(argv[2])
|
|
else: # len(argv) == 2
|
|
output_file = input_file
|
|
|
|
# Read input file
|
|
with open(input_file) as f:
|
|
html_str = f.read()
|
|
|
|
modified_html_str = limit_image_width(html_str)
|
|
|
|
# Write to new file -> overwrite if already existent!
|
|
with open(output_file, mode='w') as f:
|
|
f.write(modified_html_str)
|
|
|
|
|
|
def limit_image_width(html_str) -> str:
|
|
"""
|
|
When converting HTML5 to other formats, e.g. PDF, it
|
|
may happen that too wide images get cropped of.
|
|
|
|
If there are HTML5 image tags which do only contain
|
|
the 'src' and 'alt' attribute, then this method adds
|
|
the following style attribute to limit their width:
|
|
|
|
max-width:100%;height:auto;
|
|
|
|
Update: As images may also be to tall (and get split
|
|
up over multiple pages), here is an improved style
|
|
attribute:
|
|
|
|
max-width:100%;height:25em;
|
|
|
|
:param html_str: source HTML5 string
|
|
:returns: modified HTML5 with max-width attribute added to all img tags without size attributes
|
|
"""
|
|
|
|
html_parser = 'html5lib'
|
|
soup = BeautifulSoup(markup=html_str, features=html_parser)
|
|
|
|
tag_name = 'img'
|
|
img_tags: ResultSet = soup.find_all(name=tag_name, recursive=True)
|
|
|
|
for img_tag in img_tags:
|
|
attrs: Dict = img_tag.attrs
|
|
if 'src' not in attrs.keys():
|
|
raise 'src attr missing!'
|
|
if 'alt' not in attrs.keys():
|
|
attrs['alt'] = attrs['src']
|
|
|
|
# Any other attrs apart from 'src' and 'alt' may specify the image size and position.
|
|
# If such attrs do already exist, we continue with the next image.
|
|
# Otherwise: We add an attribute to fit the image to the screen/page.
|
|
if len(attrs) > 2:
|
|
continue
|
|
|
|
# Prevent too wide images.
|
|
# Sources:
|
|
# -> https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/
|
|
# -> https://www.w3schools.com/tags/att_style.asp
|
|
attrs['style'] = 'max-width:100%;height:25em;'
|
|
|
|
# return soup.prettify()
|
|
return str(soup)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|