commit fc193738a41339c61b08617a008aab8de9ca20a0 Author: Daniel Langbein Date: Wed Sep 15 17:27:01 2021 +0200 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fdb930 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/pkg/ +/src/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..73f69e0 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# Editor-based HTTP Client requests +/httpRequests/ diff --git a/.idea/.name b/.idea/.name new file mode 100644 index 0000000..07efa44 --- /dev/null +++ b/.idea/.name @@ -0,0 +1 @@ +image-width-limit \ No newline at end of file diff --git a/.idea/image-width-limit.iml b/.idea/image-width-limit.iml new file mode 100644 index 0000000..849a2f3 --- /dev/null +++ b/.idea/image-width-limit.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..7aeb7a8 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,14 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..14fc843 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..60a10bd --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/PKGBUILD b/PKGBUILD new file mode 100644 index 0000000..2cab21b --- /dev/null +++ b/PKGBUILD @@ -0,0 +1,17 @@ +# Maintainer: Daniel Langbein +_pkgname=image-width-limit +_reponame=arch + +pkgname="de-p1st-$_pkgname" +pkgver=1.0.0 +pkgrel=0 +pkgdesc="Script to limit HTML image width" +arch=('any') +license=('MIT') +depends=('python3' 'python-beautifulsoup4' 'python-html5lib') +source=("${_pkgname}".py) +sha256sums=('88b20a75bdfd7f0f0ff6054ff8a6e77783e21488ef3d8ecc5cfadb7fcdf37d6e') + +package() { + install -Dm0755 "${_pkgname}".py "$pkgdir"/usr/bin/"${pkgname}" +} diff --git a/README.md b/README.md new file mode 100644 index 0000000..9fb85b3 --- /dev/null +++ b/README.md @@ -0,0 +1,46 @@ +# (HTML) Image width limit + +From [image-width-limit.py](image-width-limit.py), function `limit_image_width`: + +> When converting HTML5 to other formats, e.g. PDF, it +> may happen that too wide images get cropped of. +> +> If there are HTML5 image tags which do only contain +> the 'src' and 'alt' attribute, then this method adds +> a style attribute limiting the image width to each of +> those image tags. + +## Global Installation - Arch Linux + +To install the script as `de-p1st-image-width-limit` run the following: + +```shell +makepkg -fCcsri +``` + +## Manual Installation + +Install the python3 dependencies, e.g. with pip: + +* `beautifulsoup4` (`pacman -S python-beautifulsoup4`) +* `html5lib` (`pacman -S python-html5lib`) + +And make the python script executable: + +```shell +chmod +x ./image-width-limit.py +``` + +## Example + +Global installation: + +```shell +de-p1st-image-width-limit example.html modified-example.html +``` + +Local installation: + +```shell +python3 ./image-width-limit.py example.html modified-example.html +``` diff --git a/example.html b/example.html new file mode 100644 index 0000000..ea1218b --- /dev/null +++ b/example.html @@ -0,0 +1,3 @@ +

Responsive Images

+

Example

+

diff --git a/image-width-limit.py b/image-width-limit.py new file mode 100644 index 0000000..529393d --- /dev/null +++ b/image-width-limit.py @@ -0,0 +1,82 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +from pathlib import Path +from sys import argv +from typing import Dict + +from bs4 import BeautifulSoup, ResultSet + + +def main(): + """ + Python3 dependencies: + -> beautifulsoup4 (pacman -S python-beautifulsoup4) + -> html5lib (pacman -S python-html5lib) + """ + + if len(argv) < 2 or len(argv) > 3: + raise "usage: []" + input_file = Path(argv[1]) + + if len(argv) == 3: + output_file = Path(argv[2]) + else: # len(argv) == 2 + output_file = input_file + + # Read input file + with open(input_file) as f: + html_str = f.read() + + modified_html_str = limit_image_width(html_str) + + # Write to new file -> overwrite if already existent! + with open(output_file, mode='w') as f: + f.write(modified_html_str) + + +def limit_image_width(html_str) -> str: + """ + When converting HTML5 to other formats, e.g. PDF, it + may happen that too wide images get cropped of. + + If there are HTML5 image tags which do only contain + the 'src' and 'alt' attribute, then this method adds + a style attribute limiting the image width to each of + those image tags. + + :param html_str: HTML5 + :return: modified HTML5 with max-width attribute added to image tags without size attributes + """ + + html_parser = 'html5lib' + soup = BeautifulSoup(markup=html_str, features=html_parser) + + tag_name = 'img' + img_tags: ResultSet = soup.find_all(name=tag_name, recursive=True) + + for img_tag in img_tags: + attrs: Dict = img_tag.attrs + if 'src' not in attrs.keys(): + raise 'src attr missing!' + if 'alt' not in attrs.keys(): + attrs['alt'] = attrs['src'] + + # Any other attrs apart from 'src' and 'alt' may specify the image size and position. + # If such attrs do already exist, we continue with the next image. + # Otherwise: We add an attribute to fit the image to the screen/page. + if len(attrs) > 2: + continue + + # Prevent too wide images. + # Sources: + # -> https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/ + # -> https://www.w3schools.com/tags/att_style.asp + attrs['style'] = 'max-width:100%;height:auto;' + + # return soup.prettify() + return str(soup) + + +if __name__ == '__main__': + main()