From 13c63e473aa32cf5640b6a647ef1db3b5f6a2f7d Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Thu, 5 Jul 2018 01:02:51 +0200 Subject: [PATCH] Start to cache actor icon --- app.py | 44 +++++++++++++++++++++++++++++++++++++ config.py | 1 + requirements.txt | 1 + templates/utils.html | 2 +- utils/img.py | 52 ++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 utils/img.py diff --git a/app.py b/app.py index ed8ca9c..1ffa156 100644 --- a/app.py +++ b/app.py @@ -10,6 +10,7 @@ from datetime import timezone from functools import wraps from typing import Any from typing import Dict +from typing import Tuple from urllib.parse import urlencode from urllib.parse import urlparse @@ -45,6 +46,7 @@ from config import BASE_URL from config import DB from config import DEBUG_MODE from config import DOMAIN +from config import GRIDFS from config import HEADERS from config import ICON_URL from config import ID @@ -69,9 +71,12 @@ from little_boxes.httpsig import HTTPSigAuth from little_boxes.httpsig import verify_request from little_boxes.webfinger import get_actor_url from little_boxes.webfinger import get_remote_follow_template +from utils.img import ImageCache from utils.key import get_secret_key from utils.object_service import ObjectService +IMAGE_CACHE = ImageCache(GRIDFS) + OBJECT_SERVICE = ACTOR_SERVICE = ObjectService() back = activitypub.MicroblogPubBackend() @@ -180,6 +185,30 @@ def clean_html(html): return bleach.clean(html, tags=ALLOWED_TAGS) +_GRIDFS_CACHE: Dict[Tuple[str, int], str] = {} + + +def _get_actor_icon_url(url, size): + k = (url, size) + cached = _GRIDFS_CACHE.get(k) + if cached: + return cached + + doc = IMAGE_CACHE.fs.find_one({"url": url, "size": size}) + if doc: + u = f"/img/{str(doc._id)}" + _GRIDFS_CACHE[k] = u + return u + + IMAGE_CACHE.cache_actor_icon(url) + return _get_actor_icon_url(url, size) + + +@app.template_filter() +def get_actor_icon_url(url, size): + return _get_actor_icon_url(url, size) + + @app.template_filter() def permalink_id(val): return str(hash(val)) @@ -357,6 +386,21 @@ def handle_activitypub_error(error): # App routes + +@app.route("/img/") +def serve_img(img_id): + f = IMAGE_CACHE.fs.get(ObjectId(img_id)) + resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type) + resp.headers.set("Content-Length", f.length) + resp.headers.set("ETag", f.md5) + resp.headers.set( + "Last-Modified", f.uploadDate.strftime("%a, %d %b %Y %H:%M:%S GMT") + ) + resp.headers.set("Cache-Control", "public,max-age=31536000,immutable") + resp.headers.set("Content-Encoding", "gzip") + return resp + + ####### # Login diff --git a/config.py b/config.py index 5f2da9c..906a4b9 100644 --- a/config.py +++ b/config.py @@ -97,6 +97,7 @@ mongo_client = MongoClient( DB_NAME = "{}_{}".format(USERNAME, DOMAIN.replace(".", "_")) DB = mongo_client[DB_NAME] +GRIDFS = mongo_client[f"{DB_NAME}_gridfs"] def _drop_db(): diff --git a/requirements.txt b/requirements.txt index 3c3b9a3..21ce34c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ passlib git+https://github.com/erikriver/opengraph.git git+https://github.com/tsileo/little-boxes.git pyyaml +pillow diff --git a/templates/utils.html b/templates/utils.html index 070fc9f..e456ddb 100644 --- a/templates/utils.html +++ b/templates/utils.html @@ -20,7 +20,7 @@
- +
diff --git a/utils/img.py b/utils/img.py new file mode 100644 index 0000000..2b08e58 --- /dev/null +++ b/utils/img.py @@ -0,0 +1,52 @@ +import base64 +from gzip import GzipFile +from io import BytesIO +from typing import Any + +import gridfs +import requests +from PIL import Image + + +def load(url): + """Initializes a `PIL.Image` from the URL.""" + # TODO(tsileo): user agent + resp = requests.get(url, stream=True) + resp.raise_for_status() + try: + image = Image.open(BytesIO(resp.raw.read())) + finally: + resp.close() + return image + + +def to_data_uri(img): + out = BytesIO() + img.save(out, format=img.format) + out.seek(0) + data = base64.b64encode(out.read()).decode("utf-8") + return f"data:{img.get_format_mimetype()};base64,{data}" + + +class ImageCache(object): + def __init__(self, gridfs_db: str) -> None: + self.fs = gridfs.GridFS(gridfs_db) + + def cache_actor_icon(self, url: str): + if self.fs.find_one({"url": url}): + return + i = load(url) + for size in [50, 80]: + t1 = i.copy() + t1.thumbnail((size, size)) + with BytesIO() as buf: + f1 = GzipFile(mode='wb', fileobj=buf) + t1.save(f1, format=i.format) + f1.close() + buf.seek(0) + self.fs.put( + buf, url=url, size=size, content_type=i.get_format_mimetype() + ) + + def get_file(self, url: str, size: int) -> Any: + return self.fs.find_one({"url": url, "size": size})