From 3e3816b72d08eaae0b26c036913116ad592217da Mon Sep 17 00:00:00 2001 From: askiiart Date: Sat, 4 Jan 2025 14:37:35 -0600 Subject: [PATCH] initial commit --- .gitignore | 1 + README.md | 27 +++++++++++++++++++++++++ owug.py | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +++ 4 files changed, 82 insertions(+) create mode 100644 .gitignore create mode 100644 README.md create mode 100644 owug.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..fb9df04 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/.venv \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..16dde87 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# One Webpage, Under God + +With "liberty" and JS for none. + +--- + +It's a thing to make a webpage into a single file with no external dependencies, with, of course, "liberty" (no options) and JS for none (it doesn't do JS). + +Currently it supports hardcoding: + +- favicons +- images + +## Usage + +```sh +python3 owug.py 'https://example.com' 'out.html' +``` + +## License + +OWUG is license under `GPL-3.0-only`. + +## TODO + +- Add CSS support +- Add support for making relative links into absolute ones (`/.a.html` -> `https://example.com/a.html`) diff --git a/owug.py b/owug.py new file mode 100644 index 0000000..c20e32f --- /dev/null +++ b/owug.py @@ -0,0 +1,51 @@ +import sys +from bs4 import BeautifulSoup +import requests +import base64 + + +def absolute_url(url, base_domain): + ''' + Makes any URL into an absolute URL (i.e. not a relative link) + ''' + if not (url.startswith('http://') or url.startswith('https://')): + if not url.startswith('/'): + url = '/' + url + url = base_domain + url + return url + + +# the link without the path and stuff +# e.g. https://google.com/aaaaaa -> https://google.com +if sys.argv[1].count('/') > 2: + index = sys.argv[1][8:].find('/') + 8 + domain_thing = sys.argv[1][:index] +else: + domain_thing = sys.argv[1] + +html = requests.get(sys.argv[1]).content.decode() +soup = BeautifulSoup(html, 'html5lib') + +# hardcode favicon +favicons = soup.find_all('link', rel='icon') +for favicon in favicons: + if favicon.attrs['rel'].count('icon') > 0: + url = absolute_url(favicon.attrs['href'], domain_thing) + + mime_type = requests.head(url).headers['Content-Type'] + as_base64 = base64.b64encode(requests.get(url).content).decode() + new_url = f'data:{mime_type};base64,{as_base64}' + + favicon.attrs['href'] = new_url + +# hardcode images +imgs = soup.find_all('img') +for item in imgs: + url = absolute_url(item.attrs['src'], domain_thing) + mime_type = requests.head(url).headers['Content-Type'] + as_base64 = base64.b64encode(requests.get(url).content).decode() + new_url = f'data:{mime_type};base64,{as_base64}' + item.attrs['src'] = new_url + +with open(sys.argv[2], 'wt') as f: + f.write(str(soup)) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3836f55 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +bs4 +requests +html5lib \ No newline at end of file