Cache attachments and actor icons

Fixes #17
This commit is contained in:
Thomas Sileo 2018-07-05 22:27:29 +02:00
parent a2a64a54fd
commit e8ee900c60
4 changed files with 134 additions and 29 deletions

42
app.py
View file

@ -37,10 +37,12 @@ from passlib.hash import bcrypt
from u2flib_server import u2f
from werkzeug.utils import secure_filename
import activitypub
import config
from activitypub import Box
from activitypub import embed_collection
from config import USER_AGENT
from config import ADMIN_API_KEY
from config import BASE_URL
from config import DB
@ -72,10 +74,12 @@ from little_boxes.httpsig import verify_request
from little_boxes.webfinger import get_actor_url
from little_boxes.webfinger import get_remote_follow_template
from utils.img import ImageCache
from utils.img import Kind
from utils.key import get_secret_key
from utils.object_service import ObjectService
from typing import Optional
IMAGE_CACHE = ImageCache(GRIDFS)
IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT)
OBJECT_SERVICE = ACTOR_SERVICE = ObjectService()
@ -185,28 +189,33 @@ def clean_html(html):
return bleach.clean(html, tags=ALLOWED_TAGS)
_GRIDFS_CACHE: Dict[Tuple[str, int], str] = {}
_GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {}
def _get_actor_icon_url(url, size):
k = (url, size)
def _get_file_url(url, size, kind):
k = (kind, url, size)
cached = _GRIDFS_CACHE.get(k)
if cached:
return cached
doc = IMAGE_CACHE.fs.find_one({"url": url, "size": size})
doc = IMAGE_CACHE.get_file(url, size, kind)
if doc:
u = f"/img/{str(doc._id)}"
_GRIDFS_CACHE[k] = u
return u
IMAGE_CACHE.cache_actor_icon(url)
return _get_actor_icon_url(url, size)
IMAGE_CACHE.cache(url, kind)
return _get_file_url(url, size, kind)
@app.template_filter()
def get_actor_icon_url(url, size):
return _get_actor_icon_url(url, size)
return _get_file_url(url, size, Kind.ACTOR_ICON)
@app.template_filter()
def get_attachment_url(url, size):
return _get_file_url(url, size, Kind.ATTACHMENT)
@app.template_filter()
@ -543,6 +552,23 @@ def tmp_migrate2():
return "Done"
@app.route("/migration2")
@login_required
def tmp_migrate3():
for activity in DB.activities.find():
try:
activity = ap.parse_activity(activity["activity"])
actor = activity.get_actor()
if actor.icon:
IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
if activity.type == ActivityType.CREATE.value:
for attachment in activity.get_object()._data.get("attachment", []):
IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
except:
app.logger.exception('failed')
return "Done"
@app.route("/")
def index():
if is_api_request():

View file

@ -4,7 +4,7 @@
<meta charset="utf-8">
<meta http-equiv="x-ua-compatible" content="ie=edge">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<title>{% block title %}{{ config.NAME }}{% endblock %} - microblog.pub</title>
<title>{% block title %}{{ config.NAME }}{% endblock %}'s microblog</title>
<link rel="stylesheet" href="https://unpkg.com/purecss@1.0.0/build/pure-min.css" integrity="sha384-nn4HPE8lTHyVtfCBi5yW9d20FjT8BJwUXyWZT9InLYax14RDjBj46LmSztkmNP9w" crossorigin="anonymous">
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token">

View file

@ -48,9 +48,9 @@
{% endif %}
{% for a in obj.attachment %}
{% if a.url | is_img %}
<img src="{{a.url}}" class="img-attachment">
<a href="{{ a.url | get_attachment_url(None) }}"><img src="{{a.url | get_attachment_url(720) }}" class="img-attachment"></a>
{% else %}
<li><a href="{{a.url}}" class="l">{% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}</a></li>
<li><a href="{{a.url | get_attachment_url(None) }}" class="l">{% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}</a></li>
{% endif %}
{% endfor %}
{% if obj.attachment | not_only_imgs %}

View file

@ -2,22 +2,20 @@ import base64
from gzip import GzipFile
from io import BytesIO
from typing import Any
import mimetypes
from enum import Enum
import gridfs
import requests
from PIL import Image
def load(url):
def load(url, user_agent):
"""Initializes a `PIL.Image` from the URL."""
# TODO(tsileo): user agent
resp = requests.get(url, stream=True)
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status()
try:
image = Image.open(BytesIO(resp.raw.read()))
finally:
resp.close()
return image
return Image.open(BytesIO(resp.raw.read()))
def to_data_uri(img):
@ -28,25 +26,106 @@ def to_data_uri(img):
return f"data:{img.get_format_mimetype()};base64,{data}"
class ImageCache(object):
def __init__(self, gridfs_db: str) -> None:
self.fs = gridfs.GridFS(gridfs_db)
class Kind(Enum):
ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon"
def cache_actor_icon(self, url: str):
if self.fs.find_one({"url": url}):
class ImageCache(object):
def __init__(self, gridfs_db: str, user_agent: str) -> None:
self.fs = gridfs.GridFS(gridfs_db)
self.user_agent = user_agent
def cache_attachment(self, url: str) -> None:
if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
return
i = load(url)
if (
url.endswith(".png")
or url.endswith(".jpg")
or url.endswith(".jpeg")
or url.endswith(".gif")
):
i = load(url, self.user_agent)
# Save the original attachment (gzipped)
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
i.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
)
# Save a thumbnail (gzipped)
i.thumbnail((720, 720))
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
i.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=720,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
)
return
# The attachment is not an image, download and save it anyway
with requests.get(
url, stream=True, headers={"User-Agent": self.user_agent}
) as resp:
resp.raise_for_status()
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
for chunk in resp.iter_content():
if chunk:
f1.write(chunk)
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=mimetypes.guess_type(url)[0],
kind=Kind.ATTACHMENT.value,
)
def cache_actor_icon(self, url: str) -> None:
if self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}):
return
i = load(url, self.user_agent)
for size in [50, 80]:
t1 = i.copy()
t1.thumbnail((size, size))
with BytesIO() as buf:
f1 = GzipFile(mode='wb', fileobj=buf)
f1 = GzipFile(mode="wb", fileobj=buf)
t1.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf, url=url, size=size, content_type=i.get_format_mimetype()
buf,
url=url,
size=size,
content_type=i.get_format_mimetype(),
kind=Kind.ACTOR_ICON.value,
)
def get_file(self, url: str, size: int) -> Any:
return self.fs.find_one({"url": url, "size": size})
def cache(self, url: str, kind: Kind) -> None:
if kind == Kind.ACTOR_ICON:
self.cache_actor_icon(url)
else:
self.cache_attachment(url)
def get_actor_icon(self, url: str, size: int) -> Any:
return self._get_file(url, size, Kind.ACTOR_ICON)
def get_attachment(self, url: str, size: int) -> Any:
return self._get_file(url, size, Kind.ATTACHMENT)
def get_file(self, url: str, size: int, kind: Kind) -> Any:
return self.fs.find_one({"url": url, "size": size, "kind": kind.value})