This commit is contained in:
Daniel Langbein 2021-09-15 17:27:01 +02:00
commit fc193738a4
13 changed files with 208 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/pkg/
/src/

8
.idea/.gitignore generated vendored Normal file
View File

@ -0,0 +1,8 @@
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/

1
.idea/.name generated Normal file
View File

@ -0,0 +1 @@
image-width-limit

11
.idea/image-width-limit.iml generated Normal file
View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<excludeFolder url="file://$MODULE_DIR$/pkg" />
<excludeFolder url="file://$MODULE_DIR$/src" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -0,0 +1,14 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ourVersions">
<value>
<list size="1">
<item index="0" class="java.lang.String" itemvalue="3.10" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

4
.idea/misc.xml generated Normal file
View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (venv39-2)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated Normal file
View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/../image-width-limit/.idea/image-width-limit.iml" filepath="$PROJECT_DIR$/../image-width-limit/.idea/image-width-limit.iml" />
</modules>
</component>
</project>

6
.idea/vcs.xml generated Normal file
View File

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

17
PKGBUILD Normal file
View File

@ -0,0 +1,17 @@
# Maintainer: Daniel Langbein <daniel@systemli.org>
_pkgname=image-width-limit
_reponame=arch
pkgname="de-p1st-$_pkgname"
pkgver=1.0.0
pkgrel=0
pkgdesc="Script to limit HTML image width"
arch=('any')
license=('MIT')
depends=('python3' 'python-beautifulsoup4' 'python-html5lib')
source=("${_pkgname}".py)
sha256sums=('88b20a75bdfd7f0f0ff6054ff8a6e77783e21488ef3d8ecc5cfadb7fcdf37d6e')
package() {
install -Dm0755 "${_pkgname}".py "$pkgdir"/usr/bin/"${pkgname}"
}

46
README.md Normal file
View File

@ -0,0 +1,46 @@
# (HTML) Image width limit
From [image-width-limit.py](image-width-limit.py), function `limit_image_width`:
> When converting HTML5 to other formats, e.g. PDF, it
> may happen that too wide images get cropped of.
>
> If there are HTML5 image tags which do only contain
> the 'src' and 'alt' attribute, then this method adds
> a style attribute limiting the image width to each of
> those image tags.
## Global Installation - Arch Linux
To install the script as `de-p1st-image-width-limit` run the following:
```shell
makepkg -fCcsri
```
## Manual Installation
Install the python3 dependencies, e.g. with pip:
* `beautifulsoup4` (`pacman -S python-beautifulsoup4`)
* `html5lib` (`pacman -S python-html5lib`)
And make the python script executable:
```shell
chmod +x ./image-width-limit.py
```
## Example
Global installation:
```shell
de-p1st-image-width-limit example.html modified-example.html
```
Local installation:
```shell
python3 ./image-width-limit.py example.html modified-example.html
```

3
example.html Normal file
View File

@ -0,0 +1,3 @@
<h1>Responsive Images</h1>
<h2>Example</h2>
<p><img src="image.png" alt="" /></p>

82
image-width-limit.py Normal file
View File

@ -0,0 +1,82 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
from pathlib import Path
from sys import argv
from typing import Dict
from bs4 import BeautifulSoup, ResultSet
def main():
"""
Python3 dependencies:
-> beautifulsoup4 (pacman -S python-beautifulsoup4)
-> html5lib (pacman -S python-html5lib)
"""
if len(argv) < 2 or len(argv) > 3:
raise "usage: <input_file> [<output_file>]"
input_file = Path(argv[1])
if len(argv) == 3:
output_file = Path(argv[2])
else: # len(argv) == 2
output_file = input_file
# Read input file
with open(input_file) as f:
html_str = f.read()
modified_html_str = limit_image_width(html_str)
# Write to new file -> overwrite if already existent!
with open(output_file, mode='w') as f:
f.write(modified_html_str)
def limit_image_width(html_str) -> str:
"""
When converting HTML5 to other formats, e.g. PDF, it
may happen that too wide images get cropped of.
If there are HTML5 image tags which do only contain
the 'src' and 'alt' attribute, then this method adds
a style attribute limiting the image width to each of
those image tags.
:param html_str: HTML5
:return: modified HTML5 with max-width attribute added to image tags without size attributes
"""
html_parser = 'html5lib'
soup = BeautifulSoup(markup=html_str, features=html_parser)
tag_name = 'img'
img_tags: ResultSet = soup.find_all(name=tag_name, recursive=True)
for img_tag in img_tags:
attrs: Dict = img_tag.attrs
if 'src' not in attrs.keys():
raise 'src attr missing!'
if 'alt' not in attrs.keys():
attrs['alt'] = attrs['src']
# Any other attrs apart from 'src' and 'alt' may specify the image size and position.
# If such attrs do already exist, we continue with the next image.
# Otherwise: We add an attribute to fit the image to the screen/page.
if len(attrs) > 2:
continue
# Prevent too wide images.
# Sources:
# -> https://www.smashingmagazine.com/2020/03/setting-height-width-images-important-again/
# -> https://www.w3schools.com/tags/att_style.asp
attrs['style'] = 'max-width:100%;height:auto;'
# return soup.prettify()
return str(soup)
if __name__ == '__main__':
main()