Add support for inline images

This commit is contained in:
Thomas Sileo 2019-11-01 16:05:48 +01:00
parent 00b43fa935
commit 6a40e7d7f2
3 changed files with 40 additions and 4 deletions

View file

@ -7,6 +7,7 @@ from typing import Dict
import flask import flask
import requests import requests
from bs4 import BeautifulSoup
from flask import current_app as app from flask import current_app as app
from little_boxes import activitypub as ap from little_boxes import activitypub as ap
from little_boxes.activitypub import _to_list from little_boxes.activitypub import _to_list
@ -271,7 +272,9 @@ def select_video_to_cache(links):
return videos[0] return videos[0]
@blueprint.route("/task/cache_attachments", methods=["POST"]) @blueprint.route(
"/task/cache_attachments", methods=["POST"]
) # noqa: C910 # too complex
def task_cache_attachments() -> _Response: def task_cache_attachments() -> _Response:
task = p.parse(flask.request) task = p.parse(flask.request)
app.logger.info(f"task={task!r}") app.logger.info(f"task={task!r}")
@ -286,6 +289,13 @@ def task_cache_attachments() -> _Response:
else: else:
obj = activity obj = activity
if obj.content:
content_html = BeautifulSoup(obj.content, "html5lib")
for img in content_html.find_all("img"):
src = img.attrs.get("src")
if src:
Tasks.cache_attachment({"url": src}, iri)
if obj.has_type(ap.ActivityType.VIDEO): if obj.has_type(ap.ActivityType.VIDEO):
if isinstance(obj.url, list): if isinstance(obj.url, list):
# TODO: filter only videogt # TODO: filter only videogt

View file

@ -137,7 +137,7 @@
{% if obj | has_type(['Article', 'Page']) %} {% if obj | has_type(['Article', 'Page']) %}
{{ obj.name }} <a href="{{ obj | url_or_id | get_url }}">{{ obj | url_or_id | get_url }}</a> {{ obj.name }} <a href="{{ obj | url_or_id | get_url }}">{{ obj | url_or_id | get_url }}</a>
{% elif obj | has_type('Question') %} {% elif obj | has_type('Question') %}
{{ obj.content | clean | replace_custom_emojis(obj) | code_highlight | safe }} {{ obj.content | clean | update_inline_imgs | replace_custom_emojis(obj) | code_highlight | safe }}
<ul style="list-style:none;padding:0;"> <ul style="list-style:none;padding:0;">
@ -206,7 +206,7 @@
{% else %} {% else %}
{{ obj.content | clean | replace_custom_emojis(obj) | code_highlight | safe }} {{ obj.content | clean | update_inline_imgs | replace_custom_emojis(obj) | code_highlight | safe }}
{% endif %} {% endif %}
{% if obj | has_place %} {% if obj | has_place %}

View file

@ -9,6 +9,7 @@ import emoji_unicode
import flask import flask
import html2text import html2text
import timeago import timeago
from bs4 import BeautifulSoup
from cachetools import LRUCache from cachetools import LRUCache
from little_boxes import activitypub as ap from little_boxes import activitypub as ap
from little_boxes.activitypub import _to_list from little_boxes.activitypub import _to_list
@ -104,8 +105,16 @@ ALLOWED_TAGS = [
"tfoot", "tfoot",
"colgroup", "colgroup",
"caption", "caption",
"img",
] ]
ALLOWED_ATTRIBUTES = {
"a": ["href", "title"],
"abbr": ["title"],
"acronym": ["title"],
"img": ["src", "alt", "title"],
}
@filters.app_template_filter() @filters.app_template_filter()
def replace_custom_emojis(content, note): def replace_custom_emojis(content, note):
@ -126,7 +135,9 @@ def replace_custom_emojis(content, note):
def clean_html(html): def clean_html(html):
try: try:
return bleach.clean(html, tags=ALLOWED_TAGS, strip=True) return bleach.clean(
html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, strip=True
)
except Exception: except Exception:
return "failed to clean HTML" return "failed to clean HTML"
@ -333,6 +344,21 @@ def get_attachment_url(url, size):
return _get_file_url(url, size, Kind.ATTACHMENT) return _get_file_url(url, size, Kind.ATTACHMENT)
@filters.app_template_filter()
def update_inline_imgs(content):
soup = BeautifulSoup(content)
imgs = soup.find_all("img")
if not imgs:
return content
for img in imgs:
if not img.attrs.get("src"):
continue
img.attrs["src"] = _get_file_url(img.attrs["src"], 720, Kind.ATTACHMENT)
return soup.find("body").decode_contents()
@filters.app_template_filter() @filters.app_template_filter()
def get_video_url(url): def get_video_url(url):
if isinstance(url, list): if isinstance(url, list):