diff --git a/app.py b/app.py
index 16ed99b..b35e05d 100644
--- a/app.py
+++ b/app.py
@@ -37,10 +37,12 @@ from passlib.hash import bcrypt
from u2flib_server import u2f
from werkzeug.utils import secure_filename
+
import activitypub
import config
from activitypub import Box
from activitypub import embed_collection
+from config import USER_AGENT
from config import ADMIN_API_KEY
from config import BASE_URL
from config import DB
@@ -72,10 +74,12 @@ from little_boxes.httpsig import verify_request
from little_boxes.webfinger import get_actor_url
from little_boxes.webfinger import get_remote_follow_template
from utils.img import ImageCache
+from utils.img import Kind
from utils.key import get_secret_key
from utils.object_service import ObjectService
+from typing import Optional
-IMAGE_CACHE = ImageCache(GRIDFS)
+IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT)
OBJECT_SERVICE = ACTOR_SERVICE = ObjectService()
@@ -185,28 +189,33 @@ def clean_html(html):
return bleach.clean(html, tags=ALLOWED_TAGS)
-_GRIDFS_CACHE: Dict[Tuple[str, int], str] = {}
+_GRIDFS_CACHE: Dict[Tuple[Kind, str, Optional[int]], str] = {}
-def _get_actor_icon_url(url, size):
- k = (url, size)
+def _get_file_url(url, size, kind):
+ k = (kind, url, size)
cached = _GRIDFS_CACHE.get(k)
if cached:
return cached
- doc = IMAGE_CACHE.fs.find_one({"url": url, "size": size})
+ doc = IMAGE_CACHE.get_file(url, size, kind)
if doc:
u = f"/img/{str(doc._id)}"
_GRIDFS_CACHE[k] = u
return u
- IMAGE_CACHE.cache_actor_icon(url)
- return _get_actor_icon_url(url, size)
+ IMAGE_CACHE.cache(url, kind)
+ return _get_file_url(url, size, kind)
@app.template_filter()
def get_actor_icon_url(url, size):
- return _get_actor_icon_url(url, size)
+ return _get_file_url(url, size, Kind.ACTOR_ICON)
+
+
+@app.template_filter()
+def get_attachment_url(url, size):
+ return _get_file_url(url, size, Kind.ATTACHMENT)
@app.template_filter()
@@ -543,6 +552,23 @@ def tmp_migrate2():
return "Done"
+@app.route("/migration2")
+@login_required
+def tmp_migrate3():
+ for activity in DB.activities.find():
+ try:
+ activity = ap.parse_activity(activity["activity"])
+ actor = activity.get_actor()
+ if actor.icon:
+ IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
+ if activity.type == ActivityType.CREATE.value:
+ for attachment in activity.get_object()._data.get("attachment", []):
+ IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
+ except:
+ app.logger.exception('failed')
+ return "Done"
+
+
@app.route("/")
def index():
if is_api_request():
diff --git a/templates/layout.html b/templates/layout.html
index d9bdfd0..b6c8d3e 100644
--- a/templates/layout.html
+++ b/templates/layout.html
@@ -4,7 +4,7 @@
-
{% block title %}{{ config.NAME }}{% endblock %} - microblog.pub
+{% block title %}{{ config.NAME }}{% endblock %}'s microblog
diff --git a/templates/utils.html b/templates/utils.html
index a00935a..0b158c3 100644
--- a/templates/utils.html
+++ b/templates/utils.html
@@ -48,9 +48,9 @@
{% endif %}
{% for a in obj.attachment %}
{% if a.url | is_img %}
-
+
{% else %}
- {% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}
+ {% if a.filename %}{{ a.filename }}{% else %}{{ a.url }}{% endif %}
{% endif %}
{% endfor %}
{% if obj.attachment | not_only_imgs %}
diff --git a/utils/img.py b/utils/img.py
index 2b08e58..dfe43d6 100644
--- a/utils/img.py
+++ b/utils/img.py
@@ -2,22 +2,20 @@ import base64
from gzip import GzipFile
from io import BytesIO
from typing import Any
+import mimetypes
+from enum import Enum
import gridfs
import requests
from PIL import Image
-def load(url):
+def load(url, user_agent):
"""Initializes a `PIL.Image` from the URL."""
# TODO(tsileo): user agent
- resp = requests.get(url, stream=True)
- resp.raise_for_status()
- try:
- image = Image.open(BytesIO(resp.raw.read()))
- finally:
- resp.close()
- return image
+ with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
+ resp.raise_for_status()
+ return Image.open(BytesIO(resp.raw.read()))
def to_data_uri(img):
@@ -28,25 +26,106 @@ def to_data_uri(img):
return f"data:{img.get_format_mimetype()};base64,{data}"
-class ImageCache(object):
- def __init__(self, gridfs_db: str) -> None:
- self.fs = gridfs.GridFS(gridfs_db)
+class Kind(Enum):
+ ATTACHMENT = "attachment"
+ ACTOR_ICON = "actor_icon"
- def cache_actor_icon(self, url: str):
- if self.fs.find_one({"url": url}):
+
+class ImageCache(object):
+ def __init__(self, gridfs_db: str, user_agent: str) -> None:
+ self.fs = gridfs.GridFS(gridfs_db)
+ self.user_agent = user_agent
+
+ def cache_attachment(self, url: str) -> None:
+ if self.fs.find_one({"url": url, "kind": Kind.ATTACHMENT.value}):
return
- i = load(url)
+ if (
+ url.endswith(".png")
+ or url.endswith(".jpg")
+ or url.endswith(".jpeg")
+ or url.endswith(".gif")
+ ):
+ i = load(url, self.user_agent)
+ # Save the original attachment (gzipped)
+ with BytesIO() as buf:
+ f1 = GzipFile(mode="wb", fileobj=buf)
+ i.save(f1, format=i.format)
+ f1.close()
+ buf.seek(0)
+ self.fs.put(
+ buf,
+ url=url,
+ size=None,
+ content_type=i.get_format_mimetype(),
+ kind=Kind.ATTACHMENT.value,
+ )
+ # Save a thumbnail (gzipped)
+ i.thumbnail((720, 720))
+ with BytesIO() as buf:
+ f1 = GzipFile(mode="wb", fileobj=buf)
+ i.save(f1, format=i.format)
+ f1.close()
+ buf.seek(0)
+ self.fs.put(
+ buf,
+ url=url,
+ size=720,
+ content_type=i.get_format_mimetype(),
+ kind=Kind.ATTACHMENT.value,
+ )
+ return
+
+ # The attachment is not an image, download and save it anyway
+ with requests.get(
+ url, stream=True, headers={"User-Agent": self.user_agent}
+ ) as resp:
+ resp.raise_for_status()
+ with BytesIO() as buf:
+ f1 = GzipFile(mode="wb", fileobj=buf)
+ for chunk in resp.iter_content():
+ if chunk:
+ f1.write(chunk)
+ f1.close()
+ buf.seek(0)
+ self.fs.put(
+ buf,
+ url=url,
+ size=None,
+ content_type=mimetypes.guess_type(url)[0],
+ kind=Kind.ATTACHMENT.value,
+ )
+
+ def cache_actor_icon(self, url: str) -> None:
+ if self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}):
+ return
+ i = load(url, self.user_agent)
for size in [50, 80]:
t1 = i.copy()
t1.thumbnail((size, size))
with BytesIO() as buf:
- f1 = GzipFile(mode='wb', fileobj=buf)
+ f1 = GzipFile(mode="wb", fileobj=buf)
t1.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
- buf, url=url, size=size, content_type=i.get_format_mimetype()
+ buf,
+ url=url,
+ size=size,
+ content_type=i.get_format_mimetype(),
+ kind=Kind.ACTOR_ICON.value,
)
- def get_file(self, url: str, size: int) -> Any:
- return self.fs.find_one({"url": url, "size": size})
+ def cache(self, url: str, kind: Kind) -> None:
+ if kind == Kind.ACTOR_ICON:
+ self.cache_actor_icon(url)
+ else:
+ self.cache_attachment(url)
+
+ def get_actor_icon(self, url: str, size: int) -> Any:
+ return self._get_file(url, size, Kind.ACTOR_ICON)
+
+ def get_attachment(self, url: str, size: int) -> Any:
+ return self._get_file(url, size, Kind.ATTACHMENT)
+
+ def get_file(self, url: str, size: int, kind: Kind) -> Any:
+ return self.fs.find_one({"url": url, "size": size, "kind": kind.value})