Cache attachments and actor icons

Fixes #17
This commit is contained in:
Thomas Sileo 2018-07-05 22:27:29 +02:00
parent a2a64a54fd
commit e8ee900c60
4 changed files with 134 additions and 29 deletions

42
app.py
View file

@ -37,10 +37,12 @@ from passlib.hash import bcrypt
from u2flib_server import u2f from u2flib_server import u2f
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import activitypub import activitypub
import config import config
from activitypub import Box from activitypub import Box
from activitypub import embed_collection from activitypub import embed_collection
from config import USER_AGENT
from config import ADMIN_API_KEY from config import ADMIN_API_KEY
from config import BASE_URL from config import BASE_URL
from config import DB from config import DB
@ -72,10 +74,12 @@ from little_boxes.httpsig import verify_request
from little_boxes.webfinger import get_actor_url from little_boxes.webfinger import get_actor_url
from little_boxes.webfinger import get_remote_follow_template from little_boxes.webfinger import get_remote_follow_template
from utils.img import ImageCache from utils.img import ImageCache
from utils.img import Kind
from utils.key import get_secret_key from utils.key import get_secret_key
from utils.object_service import ObjectService from utils.object_service import ObjectService
from typing import Optional
IMAGE_CACHE = ImageCache(GRIDFS) IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT)
OBJECT_SERVICE = ACTOR_SERVICE = ObjectService() OBJECT_SERVICE = ACTOR_SERVICE = ObjectService()
@ -185,28 +189,33 @@ def clean_html(html):
return bleach.clean(html, tags=ALLOWED_TAGS) return bleach.clean(html, tags=ALLOWED_TAGS)
_GRIDFS_CACHE: Dict[Tuple[str, int], str] = {} _GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {}
def _get_actor_icon_url(url, size): def _get_file_url(url, size, kind):
k = (url, size) k = (kind, url, size)
cached = _GRIDFS_CACHE.get(k) cached = _GRIDFS_CACHE.get(k)
if cached: if cached:
return cached return cached
doc = IMAGE_CACHE.fs.find_one({"url": url, "size": size}) doc = IMAGE_CACHE.get_file(url, size, kind)
if doc: if doc:
u = f"/img/{str(doc._id)}" u = f"/img/{str(doc._id)}"
_GRIDFS_CACHE[k] = u _GRIDFS_CACHE[k] = u
return u return u
IMAGE_CACHE.cache_actor_icon(url) IMAGE_CACHE.cache(url, kind)
return _get_actor_icon_url(url, size) return _get_file_url(url, size, kind)
@app.template_filter() @app.template_filter()
def get_actor_icon_url(url, size): def get_actor_icon_url(url, size):
return _get_actor_icon_url(url, size) return _get_file_url(url, size, Kind.ACTOR_ICON)
@app.template_filter()
def get_attachment_url(url, size):
return _get_file_url(url, size, Kind.ATTACHMENT)
@app.template_filter() @app.template_filter()
@ -543,6 +552,23 @@ def tmp_migrate2():
return "Done" return "Done"
@app.route("/migration2")
@login_required
def tmp_migrate3():
for activity in DB.activities.find():
try:
activity = ap.parse_activity(activity["activity"])
actor = activity.get_actor()
if actor.icon:
IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
if activity.type == ActivityType.CREATE.value:
for attachment in activity.get_object()._data.get("attachment", []):
IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
except:
app.logger.exception('failed')
return "Done"
@app.route("/") @app.route("/")
def index(): def index():
if is_api_request(): if is_api_request():

View file

@ -4,7 +4,7 @@
<meta charset="utf-8"> <meta charset="utf-8">
<meta http-equiv="x-ua-compatible" content="ie=edge"> <meta http-equiv="x-ua-compatible" content="ie=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"> <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>{% block title %}{{ config.NAME }}{% endblock %} - microblog.pub</title> <title>{% block title %}{{ config.NAME }}{% endblock %}'s microblog</title>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous"> <link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth"> <link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token"> <link rel="token_endpoint" href="{{ config.ID }}/token">

View file

@ -48,9 +48,9 @@
{% endif %} {% endif %}
{% for a in obj.attachment %} {% for a in obj.attachment %}
{% if a.url | is_img %} {% if a.url | is_img %}
<img src="{{a.url}}" class="img-attachment"> <a href="{{ a.url | get_attachment_url(None) }}"><img src="{{a.url | get_attachment_url(720) }}" class="img-attachment"></a>
{% else %} {% else %}
<li><a href="{{a.url}}" class="l">{% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}</a></li> <li><a href="{{a.url | get_attachment_url(None) }}" class="l">{% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}</a></li>
{% endif %} {% endif %}
{% endfor %} {% endfor %}
{% if obj.attachment | not_only_imgs %} {% if obj.attachment | not_only_imgs %}

View file

@ -2,22 +2,20 @@ import base64
from gzip import GzipFile from gzip import GzipFile
from io import BytesIO from io import BytesIO
from typing import Any from typing import Any
import mimetypes
from enum import Enum
import gridfs import gridfs
import requests import requests
from PIL import Image from PIL import Image
def load(url): def load(url, user_agent):
"""Initializes a `PIL.Image` from the URL.""" """Initializes a `PIL.Image` from the URL."""
# TODO(tsileo): user agent # TODO(tsileo): user agent
resp = requests.get(url, stream=True) with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status() resp.raise_for_status()
try: return Image.open(BytesIO(resp.raw.read()))
image = Image.open(BytesIO(resp.raw.read()))
finally:
resp.close()
return image
def to_data_uri(img): def to_data_uri(img):
@ -28,25 +26,106 @@ def to_data_uri(img):
return f"data:{img.get_format_mimetype()};base64,{data}" return f"data:{img.get_format_mimetype()};base64,{data}"
class ImageCache(object): class Kind(Enum):
def __init__(self, gridfs_db: str) -> None: ATTACHMENT = "attachment"
self.fs = gridfs.GridFS(gridfs_db) ACTOR_ICON = "actor_icon"
def cache_actor_icon(self, url: str):
if self.fs.find_one({"url": url}): class ImageCache(object):
def __init__(self, gridfs_db: str, user_agent: str) -> None:
self.fs = gridfs.GridFS(gridfs_db)
self.user_agent = user_agent
def cache_attachment(self, url: str) -> None:
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
return return
i = load(url) if (
url.endswith(".png")
or url.endswith(".jpg")
or url.endswith(".jpeg")
or url.endswith(".gif")
):
i = load(url, self.user_agent)
# Save the original attachment (gzipped)
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
i.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
)
# Save a thumbnail (gzipped)
i.thumbnail((720, 720))
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
i.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=720,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
)
return
# The attachment is not an image, download and save it anyway
with requests.get(
url, stream=True, headers={"User-Agent": self.user_agent}
) as resp:
resp.raise_for_status()
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
for chunk in resp.iter_content():
if chunk:
f1.write(chunk)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=mimetypes.guess_type(url)[0],
kind=Kind.ATTACHMENT.value,
)
def cache_actor_icon(self, url: str) -> None:
if self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}):
return
i = load(url, self.user_agent)
for size in [50, 80]: for size in [50, 80]:
t1 = i.copy() t1 = i.copy()
t1.thumbnail((size, size)) t1.thumbnail((size, size))
with BytesIO() as buf: with BytesIO() as buf:
f1 = GzipFile(mode='wb', fileobj=buf) f1 = GzipFile(mode="wb", fileobj=buf)
t1.save(f1, format=i.format) t1.save(f1, format=i.format)
f1.close() f1.close()
buf.seek(0) buf.seek(0)
self.fs.put( self.fs.put(
buf, url=url, size=size, content_type=i.get_format_mimetype() buf,
url=url,
size=size,
content_type=i.get_format_mimetype(),
kind=Kind.ACTOR_ICON.value,
) )
def get_file(self, url: str, size: int) -> Any: def cache(self, url: str, kind: Kind) -> None:
return self.fs.find_one({"url": url, "size": size}) if kind == Kind.ACTOR_ICON:
self.cache_actor_icon(url)
else:
self.cache_attachment(url)
def get_actor_icon(self, url: str, size: int) -> Any:
return self._get_file(url, size, Kind.ACTOR_ICON)
def get_attachment(self, url: str, size: int) -> Any:
return self._get_file(url, size, Kind.ATTACHMENT)
def get_file(self, url: str, size: int, kind: Kind) -> Any:
return self.fs.find_one({"url": url, "size": size, "kind": kind.value})