From 181328d518c188ce6804a2792971d7238af4f2ef Mon Sep 17 00:00:00 2001
From: Thomas Sileo <t@a4.io>
Date: Tue, 20 Aug 2019 22:16:47 +0200
Subject: [PATCH] Custom emojis support

---
 app.py                    | 14 ++++++++++--
 blueprints/admin.py       |  2 ++
 blueprints/api.py         |  6 +++++
 blueprints/tasks.py       | 17 ++++++++++++++
 config.py                 |  9 +++++++-
 core/activitypub.py       |  1 +
 core/inbox.py             |  8 ++++---
 core/jsonld.py            |  1 +
 core/tasks.py             | 17 +++++++++++++-
 static/emojis/.gitignore  |  2 ++
 templates/layout.html     |  5 +++++
 templates/new.html        |  3 +++
 templates/utils.html      | 13 ++++++-----
 utils/emojis.py           | 47 +++++++++++++++++++++++++++++++++++++++
 utils/media.py            | 41 ++++++++++++++++++++++++++++++++--
 utils/template_filters.py | 24 ++++++++++++++++++--
 16 files changed, 193 insertions(+), 17 deletions(-)
 create mode 100644 static/emojis/.gitignore
 create mode 100644 utils/emojis.py
diff --git a/app.py b/app.py
index 38924f9..a41bcf2 100644
--- a/app.py
+++ b/app.py
@@ -70,6 +70,7 @@ from core.shared import login_required
 from core.shared import noindex
 from core.shared import paginated_query
 from utils.blacklist import is_blacklisted
+from utils.emojis import EMOJIS
 from utils.key import get_secret_key
 from utils.template_filters import filters
 
@@ -214,7 +215,9 @@ def _log_sig():
             req_verified, actor_id = verify_request(
                 request.method, request.path, request.headers, None
             )
-            app.logger.info(f"authenticated fetch: {req_verified}: {actor_id}")
+            app.logger.info(
+                f"authenticated fetch: {req_verified}: {actor_id} {request.headers}"
+            )
         except Exception:
             app.logger.exception("failed to verify authenticated fetch")
 
@@ -235,7 +238,7 @@ def robots_txt():
     return Response(response=ROBOTS_TXT, headers={"Content-Type": "text/plain"})
 
 
-@app.route("/microblogpub-0.0.jsonld")
+@app.route("/microblogpub-0.1.jsonld")
 def microblogpub_jsonld():
     """Returns our AP context (embedded in activities @context)."""
     return Response(
@@ -497,6 +500,13 @@ def outbox():
     return Response(status=201, headers={"Location": activity_id})
 
 
+@app.route("/emoji/<name>")
+def ap_emoji(name):
+    if name in EMOJIS:
+        return jsonify(**{**EMOJIS[name].to_dict(), "@context": config.DEFAULT_CTX})
+    abort(404)
+
+
 @app.route("/outbox/<item_id>")
 def outbox_detail(item_id):
     doc = DB.activities.find_one(
diff --git a/blueprints/admin.py b/blueprints/admin.py
index 0f2fa46..ec87b6e 100644
--- a/blueprints/admin.py
+++ b/blueprints/admin.py
@@ -37,6 +37,7 @@ from core.shared import noindex
 from core.shared import p
 from core.shared import paginated_query
 from utils import now
+from utils.emojis import EMOJIS_BY_NAME
 from utils.lookup import lookup
 
 blueprint = flask.Blueprint("admin", __name__)
@@ -252,6 +253,7 @@ def admin_new() -> _Response:
         thread=thread,
         visibility=ap.Visibility,
         emojis=config.EMOJIS.split(" "),
+        custom_emojis=EMOJIS_BY_NAME,
     )
 
 
diff --git a/blueprints/api.py b/blueprints/api.py
index 45cc598..51a5f39 100644
--- a/blueprints/api.py
+++ b/blueprints/api.py
@@ -43,6 +43,7 @@ from core.shared import _Response
 from core.shared import csrf
 from core.shared import login_required
 from core.tasks import Tasks
+from utils import emojis
 from utils import now
 
 blueprint = flask.Blueprint("api", __name__)
@@ -398,6 +399,9 @@ def api_new_note() -> _Response:
 
     content, tags = parse_markdown(source)
 
+    # Check for custom emojis
+    tags = tags + emojis.tags(content)
+
     to: List[str] = []
     cc: List[str] = []
 
@@ -467,6 +471,8 @@ def api_new_question() -> _Response:
         raise ValueError("missing content")
 
     content, tags = parse_markdown(source)
+    tags = tags + emojis.tags(content)
+
     cc = [ID + "/followers"]
 
     for tag in tags:
diff --git a/blueprints/tasks.py b/blueprints/tasks.py
index c493da2..880bf0e 100644
--- a/blueprints/tasks.py
+++ b/blueprints/tasks.py
@@ -147,6 +147,7 @@ def task_cache_object() -> _Response:
         activity = ap.fetch_remote_activity(iri)
         app.logger.info(f"activity={activity!r}")
         obj = activity.get_object()
+        Tasks.cache_emojis(obj)
 
         # Refetch the object actor (without cache)
         obj_actor = ap.fetch_remote_activity(obj.get_actor().id, no_cache=True)
@@ -367,6 +368,22 @@ def task_cache_actor_icon() -> _Response:
     return ""
 
 
+@blueprint.route("/task/cache_emoji", methods=["POST"])
+def task_cache_emoji() -> _Response:
+    task = p.parse(flask.request)
+    app.logger.info(f"task={task!r}")
+    iri = task.payload["iri"]
+    url = task.payload["url"]
+    try:
+        MEDIA_CACHE.cache_emoji(url, iri)
+    except Exception as exc:
+        err = f"failed to cache emoji {url} at {iri}"
+        app.logger.exception(err)
+        raise TaskError() from exc
+
+    return ""
+
+
 @blueprint.route("/task/forward_activity", methods=["POST"])
 def task_forward_activity() -> _Response:
     task = p.parse(flask.request)
diff --git a/config.py b/config.py
index 85ee003..71523eb 100644
--- a/config.py
+++ b/config.py
@@ -3,6 +3,7 @@ import os
 import subprocess
 from datetime import datetime
 from enum import Enum
+from pathlib import Path
 
 import yaml
 from itsdangerous import JSONWebSignatureSerializer
@@ -11,11 +12,14 @@ from little_boxes.activitypub import DEFAULT_CTX as AP_DEFAULT_CTX
 from pymongo import MongoClient
 
 import sass
+from utils.emojis import _load_emojis
 from utils.key import KEY_DIR
 from utils.key import get_key
 from utils.key import get_secret_key
 from utils.media import MediaCache
 
+ROOT_DIR = Path(__file__).parent.absolute()
+
 
 class ThemeStyle(Enum):
     LIGHT = "light"
@@ -75,7 +79,7 @@ with open(os.path.join(KEY_DIR, "me.yml")) as f:
 
 DEFAULT_CTX = [
     AP_DEFAULT_CTX,
-    f"{BASE_URL}/microblogpub-0.0.jsonld",
+    f"{BASE_URL}/microblogpub-0.1.jsonld",
     {"@language": "und"},
 ]
 
@@ -164,3 +168,6 @@ BLACKLIST = conf.get("blacklist", [])
 
 # By default, we keep 14 of inbox data ; outbox is kept forever (along with bookmarked stuff, outbox replies, liked...)
 DAYS_TO_KEEP = 14
+
+# Load custom emojis (stored in static/emojis)
+_load_emojis(ROOT_DIR, BASE_URL)
diff --git a/core/activitypub.py b/core/activitypub.py
index a075136..93a25f6 100644
--- a/core/activitypub.py
+++ b/core/activitypub.py
@@ -628,6 +628,7 @@ def update_cached_actor(actor: ap.BaseActivity) -> None:
     #     {"meta.object_id": actor.id}, {"$set": {"meta.object": actor.to_dict(embed=True)}}
     # )
     _cache_actor_icon(actor)
+    Tasks.cache_emojis(actor)
 
 
 def handle_question_reply(create: ap.Create, question: ap.Question) -> None:
diff --git a/core/inbox.py b/core/inbox.py
index 751417f..df5740b 100644
--- a/core/inbox.py
+++ b/core/inbox.py
@@ -110,9 +110,11 @@ def _create_process_inbox(create: ap.Create, new_meta: _NewMeta) -> None:
     _logger.info(f"process_inbox activity={create!r}")
     # If it's a `Quesiion`, trigger an async task for updating it later (by fetching the remote and updating the
     # local copy)
-    question = create.get_object()
-    if question.has_type(ap.ActivityType.QUESTION):
-        Tasks.fetch_remote_question(question)
+    obj = create.get_object()
+    if obj.has_type(ap.ActivityType.QUESTION):
+        Tasks.fetch_remote_question(obj)
+
+    Tasks.cache_emojis(obj)
 
     handle_replies(create)
 
diff --git a/core/jsonld.py b/core/jsonld.py
index 3fe106f..d9219ed 100644
--- a/core/jsonld.py
+++ b/core/jsonld.py
@@ -12,6 +12,7 @@ MICROBLOGPUB = {
             "toot": "http://joinmastodon.org/ns#",
             "totalItems": "as:totalItems",
             "value": "schema:value",
+            "Emoji": "toot:Emoji",
         },
     ]
 }
diff --git a/core/tasks.py b/core/tasks.py
index 5c0e304..036c58d 100644
--- a/core/tasks.py
+++ b/core/tasks.py
@@ -4,6 +4,7 @@ from datetime import timezone
 from typing import Any
 from typing import Dict
 
+from little_boxes import activitypub as ap
 from poussetaches import PousseTaches
 
 from config import MEDIA_CACHE
@@ -32,7 +33,21 @@ class Tasks:
         if MEDIA_CACHE.is_actor_icon_cached(icon_url):
             return None
 
-        p.push({"icon_url": icon_url, "actor_iri": actor_iri}, "/task/cache_actor_icon")
+    @staticmethod
+    def cache_emoji(url: str, iri: str) -> None:
+        if MEDIA_CACHE.is_emoji_cached(iri):
+            return None
+
+        p.push({"url": url, "iri": iri}, "/task/cache_emoji")
+
+    @staticmethod
+    def cache_emojis(activity: ap.BaseActivity) -> None:
+        for emoji in activity.get_emojis():
+            try:
+                Tasks.cache_emoji(emoji.get_icon_url(), emoji.id)
+            except KeyError:
+                # TODO(tsileo): log invalid emoji
+                pass
 
     @staticmethod
     def post_to_remote_inbox(payload: str, recp: str) -> None:
diff --git a/static/emojis/.gitignore b/static/emojis/.gitignore
new file mode 100644
index 0000000..d6b7ef3
--- /dev/null
+++ b/static/emojis/.gitignore
@@ -0,0 +1,2 @@
+*
+!.gitignore
diff --git a/templates/layout.html b/templates/layout.html
index 1f2ff6a..a71eab7 100644
--- a/templates/layout.html
+++ b/templates/layout.html
@@ -15,6 +15,11 @@
 .icon { color: #555; }
 .emoji {
     width: 20px;
+    height: 20px;
+}
+.custom-emoji {
+    width: 25px;
+    height: 25px;
 }
 </style>
 {% block headers %}{% endblock %}
diff --git a/templates/new.html b/templates/new.html
index ce85c02..fb5a26b 100644
--- a/templates/new.html
+++ b/templates/new.html
@@ -32,6 +32,9 @@
 {% for emoji in emojis %}
 <span class="ji">{{ emoji | emojify | safe }}</span>
 {% endfor %}
+{% for emoji in custom_emojis.values() %}
+<span class="ji"><img src="{{emoji.get_icon_url()}}" alt="{{emoji.name}}" title="{{emoji.name}}" class="custom-emoji"></span>
+{% endfor %}
 </p>
 
 <textarea name="content" rows="10" cols="50" autofocus="autofocus" designMode="on">{{ content }}</textarea>
diff --git a/templates/utils.html b/templates/utils.html
index d6d298a..5cc293d 100644
--- a/templates/utils.html
+++ b/templates/utils.html
@@ -8,7 +8,7 @@
 <img class="actor-icon" src="{{ follower.icon.url | get_actor_icon_url(size) }}" style="width:{{ size }}px;">{% endif %}
 </span>
 <div class="actor-inline">
-<div style="font-weight:bold">{{ follower.name or follower.preferredUsername }}</div>
+<div style="font-weight:bold">{{ (follower.name or follower.preferredUsername) | clean | replace_custom_emojis(follower) | safe }}</div>
 <small class="lcolor">@{{ follower.preferredUsername }}@{{ follower | url_or_id | get_url | domain }}</small>
 </div>
 </a>
@@ -53,8 +53,9 @@
 
 <div class="note-wrapper">
     <div style="clear:both;height:20px;">
-	<a href="{{ actor | url_or_id | get_url }}" style="margin:0;text-decoration:none;margin: 0;text-decoration: none;display: block;width: 75%;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;float: left;" class="no-hover"><strong>{{ actor.name or actor.preferredUsername }}</strong>
-        <span class="l">@{% if not no_color and obj.id | is_from_outbox %}<span class="pcolor">{{ actor.preferredUsername }}</span>{% else %}{{ actor.preferredUsername }}{% endif %}@{% if not no_color and obj.id | is_from_outbox %}<span class="pcolor">{{ actor | url_or_id | get_url | domain }}</span>{% else %}{{ actor | url_or_id | get_url | domain }}{% endif %}</span></a>
+	<a href="{{ actor | url_or_id | get_url }}" style="margin:0;text-decoration:none;margin: 0;text-decoration: none;display: block;width: 75%;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;float: left;" class="no-hover">
+        <strong>{{ (actor.name or actor.preferredUsername) | clean | replace_custom_emojis(actor) | safe }}</strong>
+        <span class="l">@{% if not no_color and obj.id | is_from_outbox %}<span class="pcolor">{{ actor.preferredUsername | clean | replace_custom_emojis(actor) | safe }}</span>{% else %}{{ actor.preferredUsername | clean | replace_custom_emojis(actor) | safe }}{% endif %}@{% if not no_color and obj.id | is_from_outbox %}<span class="pcolor">{{ actor | url_or_id | get_url | domain }}</span>{% else %}{{ actor | url_or_id | get_url | domain }}{% endif %}</span></a>
 
 	{% if not perma %}
 	<span style="float:right;width: 25%;text-align: right;overflow: hidden;white-space: nowrap;text-overflow: ellipsis;display: block;">
@@ -64,7 +65,7 @@
 	{% endif %}
     </div>
 
-	{% if obj.summary %}<p class="p-summary">{{ obj.summary | clean | safe }}</p>{% endif %}
+	{% if obj.summary %}<p class="p-summary">{{ obj.summary | clean | replace_custom_emojis(obj) | safe }}</p>{% endif %}
     {% if obj | has_type('Video') %}
     <div class="note-video">
     <video controls preload="metadata"  src="{{ obj.url | get_video_url }}" width="480">
@@ -76,7 +77,7 @@
     {% if obj | has_type(['Article', 'Page']) %}
         {{ obj.name }} <a href="{{ obj | url_or_id | get_url }}">{{ obj | url_or_id | get_url }}</a>
     {% elif obj | has_type('Question') %}
-        {{ obj.content | clean | safe }}
+        {{ obj.content | clean | replace_custom_emojis(obj) | safe }}
 
         
         <ul style="list-style:none;padding:0;">
@@ -145,7 +146,7 @@
 
 
     {% else %}
-	{{ obj.content | clean | safe }}
+	{{ obj.content | clean | replace_custom_emojis(obj) | safe }}
     {% endif %}
 	</div>
 
diff --git a/utils/emojis.py b/utils/emojis.py
new file mode 100644
index 0000000..f0c9539
--- /dev/null
+++ b/utils/emojis.py
@@ -0,0 +1,47 @@
+import mimetypes
+import re
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+from typing import Dict
+from typing import List
+from typing import Set
+
+from little_boxes import activitypub as ap
+
+EMOJI_REGEX = re.compile(r"(:[\d\w]+:)")
+
+EMOJIS: Dict[str, ap.Emoji] = {}
+EMOJIS_BY_NAME: Dict[str, ap.Emoji] = {}
+
+
+def _load_emojis(root_dir: Path, base_url: str) -> None:
+    if EMOJIS:
+        return
+    for emoji in (root_dir / "static" / "emojis").iterdir():
+        mt = mimetypes.guess_type(emoji.name)[0]
+        if mt and mt.startswith("image/"):
+            name = emoji.name.split(".")[0]
+            ap_emoji = ap.Emoji(
+                name=f":{name}:",
+                updated=ap.format_datetime(datetime.fromtimestamp(0.0).astimezone()),
+                id=f"{base_url}/emoji/{name}",
+                icon={
+                    "mediaType": mt,
+                    "type": ap.ActivityType.IMAGE.value,
+                    "url": f"{base_url}/static/emojis/{emoji.name}",
+                },
+            )
+            EMOJIS[emoji.name] = ap_emoji
+            EMOJIS_BY_NAME[ap_emoji.name] = ap_emoji
+
+
+def tags(content: str) -> List[Dict[str, Any]]:
+    tags: List[Dict[str, Any]] = []
+    added: Set[str] = set()
+    for e in re.findall(EMOJI_REGEX, content):
+        if e not in added and e in EMOJIS_BY_NAME:
+            tags.append(EMOJIS_BY_NAME[e].to_dict())
+            added.add(e)
+
+    return tags
diff --git a/utils/media.py b/utils/media.py
index 96567de..9b0cb81 100644
--- a/utils/media.py
+++ b/utils/media.py
@@ -5,8 +5,11 @@ from enum import unique
 from functools import lru_cache
 from gzip import GzipFile
 from io import BytesIO
+from shutil import copyfileobj
 from typing import Any
 from typing import Dict
+from typing import Optional
+from typing import Tuple
 
 import gridfs
 import piexif
@@ -31,13 +34,26 @@ def is_video(filename):
     return False
 
 
-def load(url: str, user_agent: str) -> Image:
+def _load(url: str, user_agent: str) -> Tuple[BytesIO, Optional[str]]:
     """Initializes a `PIL.Image` from the URL."""
+    out = BytesIO()
     with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
         resp.raise_for_status()
 
         resp.raw.decode_content = True
-        return Image.open(BytesIO(resp.raw.read()))
+        while 1:
+            buf = resp.raw.read()
+            if not buf:
+                break
+            out.write(buf)
+    out.seek(0)
+    return out, resp.headers.get("content-type")
+
+
+def load(url: str, user_agent: str) -> Image:
+    """Initializes a `PIL.Image` from the URL."""
+    out, _ = _load(url, user_agent)
+    return Image.open(out)
 
 
 def to_data_uri(img: Image) -> str:
@@ -54,6 +70,7 @@ class Kind(Enum):
     ACTOR_ICON = "actor_icon"
     UPLOAD = "upload"
     OG_IMAGE = "og"
+    EMOJI = "emoji"
 
 
 class MediaCache(object):
@@ -173,6 +190,26 @@ class MediaCache(object):
                     kind=Kind.ACTOR_ICON.value,
                 )
 
+    def is_emoji_cached(self, url: str) -> bool:
+        return bool(self.fs.find_one({"url": url, "kind": Kind.EMOJI.value}))
+
+    def cache_emoji(self, url: str, iri: str) -> None:
+        if self.is_emoji_cached(url):
+            return
+        src, content_type = _load(url, self.user_agent)
+        with BytesIO() as buf:
+            with GzipFile(mode="wb", fileobj=buf) as g:
+                copyfileobj(src, g)
+                buf.seek(0)
+                self.fs.put(
+                    buf,
+                    url=url,
+                    remote_id=iri,
+                    size=None,
+                    content_type=content_type or mimetypes.guess_type(url)[0],
+                    kind=Kind.EMOJI.value,
+                )
+
     def save_upload(self, obuf: BytesIO, filename: str) -> str:
         # Remove EXIF metadata
         if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
diff --git a/utils/template_filters.py b/utils/template_filters.py
index e565b15..93f3b88 100644
--- a/utils/template_filters.py
+++ b/utils/template_filters.py
@@ -91,6 +91,25 @@ ALLOWED_TAGS = [
 ]
 
 
+@filters.app_template_filter()
+def replace_custom_emojis(content, note):
+    print("\n" * 50)
+    print("custom_replace", note)
+    idx = {}
+    for tag in note.get("tag", []):
+        if tag.get("type") == "Emoji":
+            # try:
+            idx[tag["name"]] = _get_file_url(tag["icon"]["url"], None, Kind.EMOJI)
+
+    for emoji_name, emoji_url in idx.items():
+        content = content.replace(
+            emoji_name,
+            f'<img class="custom-emoji" src="{emoji_url}" title="{emoji_name}" alt="{emoji_name}">',
+        )
+
+    return content
+
+
 def clean_html(html):
     try:
         return bleach.clean(html, tags=ALLOWED_TAGS, strip=True)
@@ -237,6 +256,9 @@ _FILE_URL_CACHE = LRUCache(4096)
 
 
 def _get_file_url(url, size, kind) -> str:
+    if url.startswith(BASE_URL):
+        return url
+
     k = (url, size, kind)
     cached = _FILE_URL_CACHE.get(k)
     if cached:
@@ -249,8 +271,6 @@ def _get_file_url(url, size, kind) -> str:
         return out
 
     _logger.error(f"cache not available for {url}/{size}/{kind}")
-    if url.startswith(BASE_URL):
-        return url
     p = urlparse(url)
     return f"/p/{p.scheme}" + p._replace(scheme="").geturl()[1:]