image-width-limit/image-width-limit.py
2021-09-15 17:27:01 +02:00

83 lines
2.3 KiB
Python

#!/usr/bin/python3
# -*- coding: utf-8 -*-
from pathlib import Path
from sys import argv
from typing import Dict
from bs4 import BeautifulSoup, ResultSet
def main():
"""
Python3 dependencies:
-> beautifulsoup4 (pacman -S python-beautifulsoup4)
-> html5lib (pacman -S python-html5lib)
"""
if len(argv) < 2 or len(argv) > 3:
raise "usage: <input_file> [<output_file>]"
input_file = Path(argv[1])
if len(argv) == 3:
output_file = Path(argv[2])
else: # len(argv) == 2
output_file = input_file
# Read input file
with open(input_file) as f:
html_str = f.read()
modified_html_str = limit_image_width(html_str)
# Write to new file -> overwrite if already existent!
with open(output_file, mode='w') as f:
f.write(modified_html_str)
def limit_image_width(html_str) -> str:
"""
When converting HTML5 to other formats, e.g. PDF, it
may happen that too wide images get cropped of.
If there are HTML5 image tags which do only contain
the 'src' and 'alt' attribute, then this method adds
a style attribute limiting the image width to each of
those image tags.
:param html_str: HTML5
:return: modified HTML5 with max-width attribute added to image tags without size attributes
"""
html_parser = 'html5lib'
soup = BeautifulSoup(markup=html_str, features=html_parser)
tag_name = 'img'
img_tags: ResultSet = soup.find_all(name=tag_name, recursive=True)
for img_tag in img_tags:
attrs: Dict = img_tag.attrs
if 'src' not in attrs.keys():
raise 'src attr missing!'
if 'alt' not in attrs.keys():
attrs['alt'] = attrs['src']
# Any other attrs apart from 'src' and 'alt' may specify the image size and position.
# If such attrs do already exist, we continue with the next image.
# Otherwise: We add an attribute to fit the image to the screen/page.
if len(attrs) > 2:
continue
# Prevent too wide images.
# Sources:
# -> https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/
# -> https://www.w3schools.com/tags/att_style.asp
attrs['style'] = 'max-width:100%;height:auto;'
# return soup.prettify()
return str(soup)
if __name__ == '__main__':
main()