From 181328d518c188ce6804a2792971d7238af4f2ef Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Tue, 20 Aug 2019 22:16:47 +0200 Subject: [PATCH] Custom emojis support --- app.py | 14 ++++++++++-- blueprints/admin.py | 2 ++ blueprints/api.py | 6 +++++ blueprints/tasks.py | 17 ++++++++++++++ config.py | 9 +++++++- core/activitypub.py | 1 + core/inbox.py | 8 ++++--- core/jsonld.py | 1 + core/tasks.py | 17 +++++++++++++- static/emojis/.gitignore | 2 ++ templates/layout.html | 5 +++++ templates/new.html | 3 +++ templates/utils.html | 13 ++++++----- utils/emojis.py | 47 +++++++++++++++++++++++++++++++++++++++ utils/media.py | 41 ++++++++++++++++++++++++++++++++-- utils/template_filters.py | 24 ++++++++++++++++++-- 16 files changed, 193 insertions(+), 17 deletions(-) create mode 100644 static/emojis/.gitignore create mode 100644 utils/emojis.py diff --git a/app.py b/app.py index 38924f9..a41bcf2 100644 --- a/app.py +++ b/app.py @@ -70,6 +70,7 @@ from core.shared import login_required from core.shared import noindex from core.shared import paginated_query from utils.blacklist import is_blacklisted +from utils.emojis import EMOJIS from utils.key import get_secret_key from utils.template_filters import filters @@ -214,7 +215,9 @@ def _log_sig(): req_verified, actor_id = verify_request( request.method, request.path, request.headers, None ) - app.logger.info(f"authenticated fetch: {req_verified}: {actor_id}") + app.logger.info( + f"authenticated fetch: {req_verified}: {actor_id} {request.headers}" + ) except Exception: app.logger.exception("failed to verify authenticated fetch") @@ -235,7 +238,7 @@ def robots_txt(): return Response(response=ROBOTS_TXT, headers={"Content-Type": "text/plain"}) -@app.route("/microblogpub-0.0.jsonld") +@app.route("/microblogpub-0.1.jsonld") def microblogpub_jsonld(): """Returns our AP context (embedded in activities @context).""" return Response( @@ -497,6 +500,13 @@ def outbox(): return Response(status=201, headers={"Location": activity_id}) +@app.route("/emoji/") +def ap_emoji(name): + if name in EMOJIS: + return jsonify(**{**EMOJIS[name].to_dict(), "@context": config.DEFAULT_CTX}) + abort(404) + + @app.route("/outbox/") def outbox_detail(item_id): doc = DB.activities.find_one( diff --git a/blueprints/admin.py b/blueprints/admin.py index 0f2fa46..ec87b6e 100644 --- a/blueprints/admin.py +++ b/blueprints/admin.py @@ -37,6 +37,7 @@ from core.shared import noindex from core.shared import p from core.shared import paginated_query from utils import now +from utils.emojis import EMOJIS_BY_NAME from utils.lookup import lookup blueprint = flask.Blueprint("admin", __name__) @@ -252,6 +253,7 @@ def admin_new() -> _Response: thread=thread, visibility=ap.Visibility, emojis=config.EMOJIS.split(" "), + custom_emojis=EMOJIS_BY_NAME, ) diff --git a/blueprints/api.py b/blueprints/api.py index 45cc598..51a5f39 100644 --- a/blueprints/api.py +++ b/blueprints/api.py @@ -43,6 +43,7 @@ from core.shared import _Response from core.shared import csrf from core.shared import login_required from core.tasks import Tasks +from utils import emojis from utils import now blueprint = flask.Blueprint("api", __name__) @@ -398,6 +399,9 @@ def api_new_note() -> _Response: content, tags = parse_markdown(source) + # Check for custom emojis + tags = tags + emojis.tags(content) + to: List[str] = [] cc: List[str] = [] @@ -467,6 +471,8 @@ def api_new_question() -> _Response: raise ValueError("missing content") content, tags = parse_markdown(source) + tags = tags + emojis.tags(content) + cc = [ID + "/followers"] for tag in tags: diff --git a/blueprints/tasks.py b/blueprints/tasks.py index c493da2..880bf0e 100644 --- a/blueprints/tasks.py +++ b/blueprints/tasks.py @@ -147,6 +147,7 @@ def task_cache_object() -> _Response: activity = ap.fetch_remote_activity(iri) app.logger.info(f"activity={activity!r}") obj = activity.get_object() + Tasks.cache_emojis(obj) # Refetch the object actor (without cache) obj_actor = ap.fetch_remote_activity(obj.get_actor().id, no_cache=True) @@ -367,6 +368,22 @@ def task_cache_actor_icon() -> _Response: return "" +@blueprint.route("/task/cache_emoji", methods=["POST"]) +def task_cache_emoji() -> _Response: + task = p.parse(flask.request) + app.logger.info(f"task={task!r}") + iri = task.payload["iri"] + url = task.payload["url"] + try: + MEDIA_CACHE.cache_emoji(url, iri) + except Exception as exc: + err = f"failed to cache emoji {url} at {iri}" + app.logger.exception(err) + raise TaskError() from exc + + return "" + + @blueprint.route("/task/forward_activity", methods=["POST"]) def task_forward_activity() -> _Response: task = p.parse(flask.request) diff --git a/config.py b/config.py index 85ee003..71523eb 100644 --- a/config.py +++ b/config.py @@ -3,6 +3,7 @@ import os import subprocess from datetime import datetime from enum import Enum +from pathlib import Path import yaml from itsdangerous import JSONWebSignatureSerializer @@ -11,11 +12,14 @@ from little_boxes.activitypub import DEFAULT_CTX as AP_DEFAULT_CTX from pymongo import MongoClient import sass +from utils.emojis import _load_emojis from utils.key import KEY_DIR from utils.key import get_key from utils.key import get_secret_key from utils.media import MediaCache +ROOT_DIR = Path(__file__).parent.absolute() + class ThemeStyle(Enum): LIGHT = "light" @@ -75,7 +79,7 @@ with open(os.path.join(KEY_DIR, "me.yml")) as f: DEFAULT_CTX = [ AP_DEFAULT_CTX, - f"{BASE_URL}/microblogpub-0.0.jsonld", + f"{BASE_URL}/microblogpub-0.1.jsonld", {"@language": "und"}, ] @@ -164,3 +168,6 @@ BLACKLIST = conf.get("blacklist", []) # By default, we keep 14 of inbox data ; outbox is kept forever (along with bookmarked stuff, outbox replies, liked...) DAYS_TO_KEEP = 14 + +# Load custom emojis (stored in static/emojis) +_load_emojis(ROOT_DIR, BASE_URL) diff --git a/core/activitypub.py b/core/activitypub.py index a075136..93a25f6 100644 --- a/core/activitypub.py +++ b/core/activitypub.py @@ -628,6 +628,7 @@ def update_cached_actor(actor: ap.BaseActivity) -> None: # {"meta.object_id": actor.id}, {"$set": {"meta.object": actor.to_dict(embed=True)}} # ) _cache_actor_icon(actor) + Tasks.cache_emojis(actor) def handle_question_reply(create: ap.Create, question: ap.Question) -> None: diff --git a/core/inbox.py b/core/inbox.py index 751417f..df5740b 100644 --- a/core/inbox.py +++ b/core/inbox.py @@ -110,9 +110,11 @@ def _create_process_inbox(create: ap.Create, new_meta: _NewMeta) -> None: _logger.info(f"process_inbox activity={create!r}") # If it's a `Quesiion`, trigger an async task for updating it later (by fetching the remote and updating the # local copy) - question = create.get_object() - if question.has_type(ap.ActivityType.QUESTION): - Tasks.fetch_remote_question(question) + obj = create.get_object() + if obj.has_type(ap.ActivityType.QUESTION): + Tasks.fetch_remote_question(obj) + + Tasks.cache_emojis(obj) handle_replies(create) diff --git a/core/jsonld.py b/core/jsonld.py index 3fe106f..d9219ed 100644 --- a/core/jsonld.py +++ b/core/jsonld.py @@ -12,6 +12,7 @@ MICROBLOGPUB = { "toot": "http://joinmastodon.org/ns#", "totalItems": "as:totalItems", "value": "schema:value", + "Emoji": "toot:Emoji", }, ] } diff --git a/core/tasks.py b/core/tasks.py index 5c0e304..036c58d 100644 --- a/core/tasks.py +++ b/core/tasks.py @@ -4,6 +4,7 @@ from datetime import timezone from typing import Any from typing import Dict +from little_boxes import activitypub as ap from poussetaches import PousseTaches from config import MEDIA_CACHE @@ -32,7 +33,21 @@ class Tasks: if MEDIA_CACHE.is_actor_icon_cached(icon_url): return None - p.push({"icon_url": icon_url, "actor_iri": actor_iri}, "/task/cache_actor_icon") + @staticmethod + def cache_emoji(url: str, iri: str) -> None: + if MEDIA_CACHE.is_emoji_cached(iri): + return None + + p.push({"url": url, "iri": iri}, "/task/cache_emoji") + + @staticmethod + def cache_emojis(activity: ap.BaseActivity) -> None: + for emoji in activity.get_emojis(): + try: + Tasks.cache_emoji(emoji.get_icon_url(), emoji.id) + except KeyError: + # TODO(tsileo): log invalid emoji + pass @staticmethod def post_to_remote_inbox(payload: str, recp: str) -> None: diff --git a/static/emojis/.gitignore b/static/emojis/.gitignore new file mode 100644 index 0000000..d6b7ef3 --- /dev/null +++ b/static/emojis/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/templates/layout.html b/templates/layout.html index 1f2ff6a..a71eab7 100644 --- a/templates/layout.html +++ b/templates/layout.html @@ -15,6 +15,11 @@ .icon { color: #555; } .emoji { width: 20px; + height: 20px; +} +.custom-emoji { + width: 25px; + height: 25px; } {% block headers %}{% endblock %} diff --git a/templates/new.html b/templates/new.html index ce85c02..fb5a26b 100644 --- a/templates/new.html +++ b/templates/new.html @@ -32,6 +32,9 @@ {% for emoji in emojis %} {{ emoji | emojify | safe }} {% endfor %} +{% for emoji in custom_emojis.values() %} +{{emoji.name}} +{% endfor %}

diff --git a/templates/utils.html b/templates/utils.html index d6d298a..5cc293d 100644 --- a/templates/utils.html +++ b/templates/utils.html @@ -8,7 +8,7 @@ {% endif %}
-
{{ follower.name or follower.preferredUsername }}
+
{{ (follower.name or follower.preferredUsername) | clean | replace_custom_emojis(follower) | safe }}
@{{ follower.preferredUsername }}@{{ follower | url_or_id | get_url | domain }}
@@ -53,8 +53,9 @@
- {{ actor.name or actor.preferredUsername }} - @{% if not no_color and obj.id | is_from_outbox %}{{ actor.preferredUsername }}{% else %}{{ actor.preferredUsername }}{% endif %}@{% if not no_color and obj.id | is_from_outbox %}{{ actor | url_or_id | get_url | domain }}{% else %}{{ actor | url_or_id | get_url | domain }}{% endif %} + + {{ (actor.name or actor.preferredUsername) | clean | replace_custom_emojis(actor) | safe }} + @{% if not no_color and obj.id | is_from_outbox %}{{ actor.preferredUsername | clean | replace_custom_emojis(actor) | safe }}{% else %}{{ actor.preferredUsername | clean | replace_custom_emojis(actor) | safe }}{% endif %}@{% if not no_color and obj.id | is_from_outbox %}{{ actor | url_or_id | get_url | domain }}{% else %}{{ actor | url_or_id | get_url | domain }}{% endif %} {% if not perma %} @@ -64,7 +65,7 @@ {% endif %}
- {% if obj.summary %}

{{ obj.summary | clean | safe }}

{% endif %} + {% if obj.summary %}

{{ obj.summary | clean | replace_custom_emojis(obj) | safe }}

{% endif %} {% if obj | has_type('Video') %}
diff --git a/utils/emojis.py b/utils/emojis.py new file mode 100644 index 0000000..f0c9539 --- /dev/null +++ b/utils/emojis.py @@ -0,0 +1,47 @@ +import mimetypes +import re +from datetime import datetime +from pathlib import Path +from typing import Any +from typing import Dict +from typing import List +from typing import Set + +from little_boxes import activitypub as ap + +EMOJI_REGEX = re.compile(r"(:[\d\w]+:)") + +EMOJIS: Dict[str, ap.Emoji] = {} +EMOJIS_BY_NAME: Dict[str, ap.Emoji] = {} + + +def _load_emojis(root_dir: Path, base_url: str) -> None: + if EMOJIS: + return + for emoji in (root_dir / "static" / "emojis").iterdir(): + mt = mimetypes.guess_type(emoji.name)[0] + if mt and mt.startswith("image/"): + name = emoji.name.split(".")[0] + ap_emoji = ap.Emoji( + name=f":{name}:", + updated=ap.format_datetime(datetime.fromtimestamp(0.0).astimezone()), + id=f"{base_url}/emoji/{name}", + icon={ + "mediaType": mt, + "type": ap.ActivityType.IMAGE.value, + "url": f"{base_url}/static/emojis/{emoji.name}", + }, + ) + EMOJIS[emoji.name] = ap_emoji + EMOJIS_BY_NAME[ap_emoji.name] = ap_emoji + + +def tags(content: str) -> List[Dict[str, Any]]: + tags: List[Dict[str, Any]] = [] + added: Set[str] = set() + for e in re.findall(EMOJI_REGEX, content): + if e not in added and e in EMOJIS_BY_NAME: + tags.append(EMOJIS_BY_NAME[e].to_dict()) + added.add(e) + + return tags diff --git a/utils/media.py b/utils/media.py index 96567de..9b0cb81 100644 --- a/utils/media.py +++ b/utils/media.py @@ -5,8 +5,11 @@ from enum import unique from functools import lru_cache from gzip import GzipFile from io import BytesIO +from shutil import copyfileobj from typing import Any from typing import Dict +from typing import Optional +from typing import Tuple import gridfs import piexif @@ -31,13 +34,26 @@ def is_video(filename): return False -def load(url: str, user_agent: str) -> Image: +def _load(url: str, user_agent: str) -> Tuple[BytesIO, Optional[str]]: """Initializes a `PIL.Image` from the URL.""" + out = BytesIO() with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: resp.raise_for_status() resp.raw.decode_content = True - return Image.open(BytesIO(resp.raw.read())) + while 1: + buf = resp.raw.read() + if not buf: + break + out.write(buf) + out.seek(0) + return out, resp.headers.get("content-type") + + +def load(url: str, user_agent: str) -> Image: + """Initializes a `PIL.Image` from the URL.""" + out, _ = _load(url, user_agent) + return Image.open(out) def to_data_uri(img: Image) -> str: @@ -54,6 +70,7 @@ class Kind(Enum): ACTOR_ICON = "actor_icon" UPLOAD = "upload" OG_IMAGE = "og" + EMOJI = "emoji" class MediaCache(object): @@ -173,6 +190,26 @@ class MediaCache(object): kind=Kind.ACTOR_ICON.value, ) + def is_emoji_cached(self, url: str) -> bool: + return bool(self.fs.find_one({"url": url, "kind": Kind.EMOJI.value})) + + def cache_emoji(self, url: str, iri: str) -> None: + if self.is_emoji_cached(url): + return + src, content_type = _load(url, self.user_agent) + with BytesIO() as buf: + with GzipFile(mode="wb", fileobj=buf) as g: + copyfileobj(src, g) + buf.seek(0) + self.fs.put( + buf, + url=url, + remote_id=iri, + size=None, + content_type=content_type or mimetypes.guess_type(url)[0], + kind=Kind.EMOJI.value, + ) + def save_upload(self, obuf: BytesIO, filename: str) -> str: # Remove EXIF metadata if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"): diff --git a/utils/template_filters.py b/utils/template_filters.py index e565b15..93f3b88 100644 --- a/utils/template_filters.py +++ b/utils/template_filters.py @@ -91,6 +91,25 @@ ALLOWED_TAGS = [ ] +@filters.app_template_filter() +def replace_custom_emojis(content, note): + print("\n" * 50) + print("custom_replace", note) + idx = {} + for tag in note.get("tag", []): + if tag.get("type") == "Emoji": + # try: + idx[tag["name"]] = _get_file_url(tag["icon"]["url"], None, Kind.EMOJI) + + for emoji_name, emoji_url in idx.items(): + content = content.replace( + emoji_name, + f'{emoji_name}', + ) + + return content + + def clean_html(html): try: return bleach.clean(html, tags=ALLOWED_TAGS, strip=True) @@ -237,6 +256,9 @@ _FILE_URL_CACHE = LRUCache(4096) def _get_file_url(url, size, kind) -> str: + if url.startswith(BASE_URL): + return url + k = (url, size, kind) cached = _FILE_URL_CACHE.get(k) if cached: @@ -249,8 +271,6 @@ def _get_file_url(url, size, kind) -> str: return out _logger.error(f"cache not available for {url}/{size}/{kind}") - if url.startswith(BASE_URL): - return url p = urlparse(url) return f"/p/{p.scheme}" + p._replace(scheme="").geturl()[1:]