initial commit
This commit is contained in:
commit
3e3816b72d
4 changed files with 82 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/.venv
|
27
README.md
Normal file
27
README.md
Normal file
|
@ -0,0 +1,27 @@
|
|||
# One Webpage, Under God
|
||||
|
||||
With "liberty" and JS for none.
|
||||
|
||||
---
|
||||
|
||||
It's a thing to make a webpage into a single file with no external dependencies, with, of course, "liberty" (no options) and JS for none (it doesn't do JS).
|
||||
|
||||
Currently it supports hardcoding:
|
||||
|
||||
- favicons
|
||||
- images
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
python3 owug.py 'https://example.com' 'out.html'
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
OWUG is license under `GPL-3.0-only`.
|
||||
|
||||
## TODO
|
||||
|
||||
- Add CSS support
|
||||
- Add support for making relative links into absolute ones (`/.a.html` -> `https://example.com/a.html`)
|
51
owug.py
Normal file
51
owug.py
Normal file
|
@ -0,0 +1,51 @@
|
|||
import sys
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import base64
|
||||
|
||||
|
||||
def absolute_url(url, base_domain):
|
||||
'''
|
||||
Makes any URL into an absolute URL (i.e. not a relative link)
|
||||
'''
|
||||
if not (url.startswith('http://') or url.startswith('https://')):
|
||||
if not url.startswith('/'):
|
||||
url = '/' + url
|
||||
url = base_domain + url
|
||||
return url
|
||||
|
||||
|
||||
# the link without the path and stuff
|
||||
# e.g. https://google.com/aaaaaa -> https://google.com
|
||||
if sys.argv[1].count('/') > 2:
|
||||
index = sys.argv[1][8:].find('/') + 8
|
||||
domain_thing = sys.argv[1][:index]
|
||||
else:
|
||||
domain_thing = sys.argv[1]
|
||||
|
||||
html = requests.get(sys.argv[1]).content.decode()
|
||||
soup = BeautifulSoup(html, 'html5lib')
|
||||
|
||||
# hardcode favicon
|
||||
favicons = soup.find_all('link', rel='icon')
|
||||
for favicon in favicons:
|
||||
if favicon.attrs['rel'].count('icon') > 0:
|
||||
url = absolute_url(favicon.attrs['href'], domain_thing)
|
||||
|
||||
mime_type = requests.head(url).headers['Content-Type']
|
||||
as_base64 = base64.b64encode(requests.get(url).content).decode()
|
||||
new_url = f'data:{mime_type};base64,{as_base64}'
|
||||
|
||||
favicon.attrs['href'] = new_url
|
||||
|
||||
# hardcode images
|
||||
imgs = soup.find_all('img')
|
||||
for item in imgs:
|
||||
url = absolute_url(item.attrs['src'], domain_thing)
|
||||
mime_type = requests.head(url).headers['Content-Type']
|
||||
as_base64 = base64.b64encode(requests.get(url).content).decode()
|
||||
new_url = f'data:{mime_type};base64,{as_base64}'
|
||||
item.attrs['src'] = new_url
|
||||
|
||||
with open(sys.argv[2], 'wt') as f:
|
||||
f.write(str(soup))
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
|
@ -0,0 +1,3 @@
|
|||
bs4
|
||||
requests
|
||||
html5lib
|
Loading…
Reference in a new issue