Dedup inline attachments (when already inlined)

This commit is contained in:
Thomas Sileo 2019-11-03 09:59:04 +01:00
parent 08cdcd6b50
commit be0f5c04b2
2 changed files with 25 additions and 1 deletions

View file

@ -245,7 +245,7 @@
<h3 class="l">Attachments</h3>
<ul style="padding:0;list-style-type: none;">
{% endif %}
{% for a in obj.attachment %}
{% for a in (obj | iter_note_attachments) %}
{% if (a.mediaType and a.mediaType.startswith("image/")) or (a.type and a.type == 'Image') %}
<a href="{{ a.url | get_attachment_url(None) }}">
<img src="{{a.url | get_attachment_url(720) }}" title="{{ a.name }}" alt="{{ a.name }}" class="img-attachment"></a>

View file

@ -2,6 +2,7 @@ import logging
import urllib
from datetime import datetime
from datetime import timezone
from functools import lru_cache
from urllib.parse import urlparse
import bleach
@ -345,6 +346,7 @@ def get_attachment_url(url, size):
@filters.app_template_filter()
@lru_cache(maxsize=256)
def update_inline_imgs(content):
soup = BeautifulSoup(content, "html5lib")
imgs = soup.find_all("img")
@ -418,6 +420,28 @@ def has_actor_type(doc):
return False
@lru_cache(maxsize=256)
def _get_inlined_imgs(content):
imgs = []
if not content:
return imgs
soup = BeautifulSoup(content, "html5lib")
for img in soup.find_all("img"):
src = img.attrs.get("src")
if src:
imgs.append(src)
return imgs
@filters.app_template_filter()
def iter_note_attachments(note):
attachments = note.get("attachment", [])
imgs = _get_inlined_imgs(note.get("content"))
return [a for a in attachments if a.get("url") not in imgs]
@filters.app_template_filter()
def not_only_imgs(attachment):
for a in attachment: