initial commit

This commit is contained in:
askiiart 2025-01-04 14:37:35 -06:00
commit 3e3816b72d
Signed by untrusted user who does not match committer: askiiart
GPG key ID: 6A32977DAF31746A
4 changed files with 82 additions and 0 deletions

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/.venv

27
README.md Normal file
View file

@ -0,0 +1,27 @@
# One Webpage, Under God
With "liberty" and JS for none.
---
It's a thing to make a webpage into a single file with no external dependencies, with, of course, "liberty" (no options) and JS for none (it doesn't do JS).
Currently it supports hardcoding:
- favicons
- images
## Usage
```sh
python3 owug.py 'https://example.com' 'out.html'
```
## License
OWUG is license under `GPL-3.0-only`.
## TODO
- Add CSS support
- Add support for making relative links into absolute ones (`/.a.html` -> `https://example.com/a.html`)

51
owug.py Normal file
View file

@ -0,0 +1,51 @@
import sys
from bs4 import BeautifulSoup
import requests
import base64
def absolute_url(url, base_domain):
'''
Makes any URL into an absolute URL (i.e. not a relative link)
'''
if not (url.startswith('http://') or url.startswith('https://')):
if not url.startswith('/'):
url = '/' + url
url = base_domain + url
return url
# the link without the path and stuff
# e.g. https://google.com/aaaaaa -> https://google.com
if sys.argv[1].count('/') > 2:
index = sys.argv[1][8:].find('/') + 8
domain_thing = sys.argv[1][:index]
else:
domain_thing = sys.argv[1]
html = requests.get(sys.argv[1]).content.decode()
soup = BeautifulSoup(html, 'html5lib')
# hardcode favicon
favicons = soup.find_all('link', rel='icon')
for favicon in favicons:
if favicon.attrs['rel'].count('icon') > 0:
url = absolute_url(favicon.attrs['href'], domain_thing)
mime_type = requests.head(url).headers['Content-Type']
as_base64 = base64.b64encode(requests.get(url).content).decode()
new_url = f'data:{mime_type};base64,{as_base64}'
favicon.attrs['href'] = new_url
# hardcode images
imgs = soup.find_all('img')
for item in imgs:
url = absolute_url(item.attrs['src'], domain_thing)
mime_type = requests.head(url).headers['Content-Type']
as_base64 = base64.b64encode(requests.get(url).content).decode()
new_url = f'data:{mime_type};base64,{as_base64}'
item.attrs['src'] = new_url
with open(sys.argv[2], 'wt') as f:
f.write(str(soup))

3
requirements.txt Normal file
View file

@ -0,0 +1,3 @@
bs4
requests
html5lib