microblog.pub/utils/media.py

245 lines
7.9 KiB
Python
Raw Normal View History

2018-07-04 18:02:51 -05:00
import base64
import mimetypes
from enum import Enum
2019-08-05 15:40:24 -05:00
from enum import unique
2019-08-15 09:08:52 -05:00
from functools import lru_cache
2018-07-04 18:02:51 -05:00
from gzip import GzipFile
from io import BytesIO
2019-08-20 15:16:47 -05:00
from shutil import copyfileobj
2018-07-04 18:02:51 -05:00
from typing import Any
2019-07-05 15:05:28 -05:00
from typing import Dict
2019-08-20 15:16:47 -05:00
from typing import Optional
from typing import Tuple
2019-07-05 15:05:28 -05:00
2018-07-04 18:02:51 -05:00
import gridfs
import piexif
2018-07-04 18:02:51 -05:00
import requests
2019-07-05 15:09:41 -05:00
from little_boxes import activitypub as ap
2018-07-04 18:02:51 -05:00
from PIL import Image
2019-08-15 09:08:52 -05:00
@lru_cache(2048)
def _is_img(filename):
mimetype, _ = mimetypes.guess_type(filename.lower())
if mimetype and mimetype.split("/")[0] in ["image"]:
return True
return False
@lru_cache(2048)
def is_video(filename):
mimetype, _ = mimetypes.guess_type(filename.lower())
if mimetype and mimetype.split("/")[0] in ["video"]:
return True
return False
2019-08-20 15:16:47 -05:00
def _load(url: str, user_agent: str) -> Tuple[BytesIO, Optional[str]]:
2018-07-04 18:02:51 -05:00
"""Initializes a `PIL.Image` from the URL."""
2019-08-20 15:16:47 -05:00
out = BytesIO()
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status()
2018-07-24 16:58:13 -05:00
resp.raw.decode_content = True
2019-08-20 15:16:47 -05:00
while 1:
buf = resp.raw.read()
if not buf:
break
out.write(buf)
out.seek(0)
return out, resp.headers.get("content-type")
def load(url: str, user_agent: str) -> Image:
"""Initializes a `PIL.Image` from the URL."""
out, _ = _load(url, user_agent)
return Image.open(out)
2018-07-04 18:02:51 -05:00
2019-08-05 15:40:24 -05:00
def to_data_uri(img: Image) -> str:
2018-07-04 18:02:51 -05:00
out = BytesIO()
img.save(out, format=img.format)
out.seek(0)
data = base64.b64encode(out.read()).decode("utf-8")
return f"data:{img.get_format_mimetype()};base64,{data}"
2019-08-05 15:40:24 -05:00
@unique
class Kind(Enum):
ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon"
UPLOAD = "upload"
2018-07-21 16:16:40 -05:00
OG_IMAGE = "og"
2019-08-20 15:16:47 -05:00
EMOJI = "emoji"
class MediaCache(object):
def __init__(self, gridfs_db: str, user_agent: str) -> None:
2018-07-04 18:02:51 -05:00
self.fs = gridfs.GridFS(gridfs_db)
self.user_agent = user_agent
def cache_og_image(self, url: str, remote_id: str) -> None:
if self.fs.find_one({"url": url, "kind": Kind.OG_IMAGE.value}):
return
i = load(url, self.user_agent)
# Save the original attachment (gzipped)
i.thumbnail((100, 100))
with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as f1:
i.save(f1, format=i.format)
buf.seek(0)
self.fs.put(
buf,
url=url,
size=100,
content_type=i.get_format_mimetype(),
kind=Kind.OG_IMAGE.value,
remote_id=remote_id,
)
def cache_attachment(self, attachment: Dict[str, Any], remote_id: str) -> None:
2019-07-05 15:05:28 -05:00
url = attachment["url"]
# Ensure it's not already there
2019-07-21 14:53:10 -05:00
if self.fs.find_one(
{"url": url, "kind": Kind.ATTACHMENT.value, "remote_id": remote_id}
):
return
2019-07-05 15:05:28 -05:00
# If it's an image, make some thumbnails
if (
2019-08-15 09:08:52 -05:00
_is_img(url)
2019-07-05 15:05:28 -05:00
or attachment.get("mediaType", "").startswith("image/")
or ap._has_type(attachment.get("type"), ap.ActivityType.IMAGE)
):
2019-07-05 15:05:28 -05:00
try:
i = load(url, self.user_agent)
# Save the original attachment (gzipped)
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
i.save(f1, format=i.format)
2019-07-05 15:05:28 -05:00
f1.close()
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
remote_id=remote_id,
)
# Save a thumbnail (gzipped)
i.thumbnail((720, 720))
with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as f1:
i.save(f1, format=i.format)
buf.seek(0)
self.fs.put(
buf,
url=url,
size=720,
content_type=i.get_format_mimetype(),
kind=Kind.ATTACHMENT.value,
remote_id=remote_id,
)
return
except Exception:
# FIXME(tsileo): logging
pass
# The attachment is not an image, download and save it anyway
with requests.get(
url, stream=True, headers={"User-Agent": self.user_agent}
) as resp:
resp.raise_for_status()
with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as f1:
2019-08-15 09:08:52 -05:00
for chunk in resp.iter_content(chunk_size=2 << 20):
if chunk:
2019-08-15 09:08:52 -05:00
print(len(chunk))
f1.write(chunk)
buf.seek(0)
self.fs.put(
buf,
url=url,
size=None,
content_type=mimetypes.guess_type(url)[0],
kind=Kind.ATTACHMENT.value,
remote_id=remote_id,
)
2018-07-04 18:02:51 -05:00
def is_actor_icon_cached(self, url: str) -> bool:
return bool(self.fs.find_one({"url": url, "kind": Kind.ACTOR_ICON.value}))
def cache_actor_icon(self, url: str) -> None:
if self.is_actor_icon_cached(url):
2018-07-04 18:02:51 -05:00
return
i = load(url, self.user_agent)
2018-07-04 18:02:51 -05:00
for size in [50, 80]:
t1 = i.copy()
t1.thumbnail((size, size))
with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as f1:
t1.save(f1, format=i.format)
2018-07-04 18:02:51 -05:00
buf.seek(0)
self.fs.put(
buf,
url=url,
size=size,
content_type=i.get_format_mimetype(),
kind=Kind.ACTOR_ICON.value,
2018-07-04 18:02:51 -05:00
)
2019-08-20 15:16:47 -05:00
def is_emoji_cached(self, url: str) -> bool:
return bool(self.fs.find_one({"url": url, "kind": Kind.EMOJI.value}))
def cache_emoji(self, url: str, iri: str) -> None:
if self.is_emoji_cached(url):
return
src, content_type = _load(url, self.user_agent)
with BytesIO() as buf:
with GzipFile(mode="wb", fileobj=buf) as g:
copyfileobj(src, g)
buf.seek(0)
self.fs.put(
buf,
url=url,
remote_id=iri,
size=None,
content_type=content_type or mimetypes.guess_type(url)[0],
kind=Kind.EMOJI.value,
)
def save_upload(self, obuf: BytesIO, filename: str) -> str:
# Remove EXIF metadata
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
obuf.seek(0)
with BytesIO() as buf2:
piexif.remove(obuf.getvalue(), buf2)
obuf.truncate(0)
obuf.write(buf2.getvalue())
obuf.seek(0)
mtype = mimetypes.guess_type(filename)[0]
with BytesIO() as gbuf:
with GzipFile(mode="wb", fileobj=gbuf) as gzipfile:
gzipfile.write(obuf.getvalue())
gbuf.seek(0)
oid = self.fs.put(
gbuf,
content_type=mtype,
upload_filename=filename,
kind=Kind.UPLOAD.value,
)
return str(oid)
def get_actor_icon(self, url: str, size: int) -> Any:
2018-07-05 15:42:38 -05:00
return self.get_file(url, size, Kind.ACTOR_ICON)
def get_attachment(self, url: str, size: int) -> Any:
2018-07-05 15:42:38 -05:00
return self.get_file(url, size, Kind.ATTACHMENT)
def get_file(self, url: str, size: int, kind: Kind) -> Any:
return self.fs.find_one({"url": url, "size": size, "kind": kind.value})