Store uplaods in MongoDB too, start pagination

This commit is contained in:
Thomas Sileo 2018-07-06 23:15:49 +02:00
parent 0d7a1b9b5a
commit acafc1cc85
6 changed files with 138 additions and 55 deletions

112
app.py
View file

@ -1,5 +1,4 @@
import binascii import binascii
import hashlib
import json import json
import logging import logging
import mimetypes import mimetypes
@ -8,15 +7,16 @@ import urllib
from datetime import datetime from datetime import datetime
from datetime import timezone from datetime import timezone
from functools import wraps from functools import wraps
from io import BytesIO
from typing import Any from typing import Any
from typing import Dict from typing import Dict
from typing import Optional
from typing import Tuple from typing import Tuple
from urllib.parse import urlencode from urllib.parse import urlencode
from urllib.parse import urlparse from urllib.parse import urlparse
import bleach import bleach
import mf2py import mf2py
import piexif
import pymongo import pymongo
import timeago import timeago
from bson.objectid import ObjectId from bson.objectid import ObjectId
@ -37,24 +37,22 @@ from passlib.hash import bcrypt
from u2flib_server import u2f from u2flib_server import u2f
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
import activitypub import activitypub
import config import config
from activitypub import Box from activitypub import Box
from activitypub import embed_collection from activitypub import embed_collection
from config import USER_AGENT
from config import ADMIN_API_KEY from config import ADMIN_API_KEY
from config import BASE_URL from config import BASE_URL
from config import DB from config import DB
from config import DEBUG_MODE from config import DEBUG_MODE
from config import DOMAIN from config import DOMAIN
from config import GRIDFS
from config import HEADERS from config import HEADERS
from config import ICON_URL from config import ICON_URL
from config import ID from config import ID
from config import JWT from config import JWT
from config import KEY from config import KEY
from config import ME from config import ME
from config import MEDIA_CACHE
from config import PASS from config import PASS
from config import USERNAME from config import USERNAME
from config import VERSION from config import VERSION
@ -73,13 +71,9 @@ from little_boxes.httpsig import HTTPSigAuth
from little_boxes.httpsig import verify_request from little_boxes.httpsig import verify_request
from little_boxes.webfinger import get_actor_url from little_boxes.webfinger import get_actor_url
from little_boxes.webfinger import get_remote_follow_template from little_boxes.webfinger import get_remote_follow_template
from utils.img import ImageCache
from utils.img import Kind
from utils.key import get_secret_key from utils.key import get_secret_key
from utils.media import Kind
from utils.object_service import ObjectService from utils.object_service import ObjectService
from typing import Optional
IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT)
OBJECT_SERVICE = ACTOR_SERVICE = ObjectService() OBJECT_SERVICE = ACTOR_SERVICE = ObjectService()
@ -198,13 +192,13 @@ def _get_file_url(url, size, kind):
if cached: if cached:
return cached return cached
doc = IMAGE_CACHE.get_file(url, size, kind) doc = MEDIA_CACHE.get_file(url, size, kind)
if doc: if doc:
u = f"/img/{str(doc._id)}" u = f"/media/{str(doc._id)}"
_GRIDFS_CACHE[k] = u _GRIDFS_CACHE[k] = u
return u return u
IMAGE_CACHE.cache(url, kind) MEDIA_CACHE.cache(url, kind)
return _get_file_url(url, size, kind) return _get_file_url(url, size, kind)
@ -395,10 +389,35 @@ def handle_activitypub_error(error):
# App routes # App routes
ROBOTS_TXT = """User-agent: *
Disallow: /admin/
Disallow: /static/
Disallow: /media/
Disallow: /uploads/"""
@app.route("/img/<img_id>")
def serve_img(img_id): @app.route("/robots.txt")
f = IMAGE_CACHE.fs.get(ObjectId(img_id)) def robots_txt():
return Response(response=ROBOTS_TXT, headers={"Content-Type": "text/plain"})
@app.route("/media/<media_id>")
def serve_media(media_id):
f = MEDIA_CACHE.fs.get(ObjectId(media_id))
resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type)
resp.headers.set("Content-Length", f.length)
resp.headers.set("ETag", f.md5)
resp.headers.set(
"Last-Modified", f.uploadDate.strftime("%a, %d %b %Y %H:%M:%S GMT")
)
resp.headers.set("Cache-Control", "public,max-age=31536000,immutable")
resp.headers.set("Content-Encoding", "gzip")
return resp
@app.route("/uploads/<oid>/<fname>")
def serve_uploads(oid, fname):
f = MEDIA_CACHE.fs.get(ObjectId(oid))
resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type) resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type)
resp.headers.set("Content-Length", f.length) resp.headers.set("Content-Length", f.length)
resp.headers.set("ETag", f.md5) resp.headers.set("ETag", f.md5)
@ -560,12 +579,12 @@ def tmp_migrate3():
activity = ap.parse_activity(activity["activity"]) activity = ap.parse_activity(activity["activity"])
actor = activity.get_actor() actor = activity.get_actor()
if actor.icon: if actor.icon:
IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON) MEDIA_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
if activity.type == ActivityType.CREATE.value: if activity.type == ActivityType.CREATE.value:
for attachment in activity.get_object()._data.get("attachment", []): for attachment in activity.get_object()._data.get("attachment", []):
IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT) MEDIA_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
except: except Exception:
app.logger.exception('failed') app.logger.exception("failed")
return "Done" return "Done"
@ -574,8 +593,10 @@ def index():
if is_api_request(): if is_api_request():
return jsonify(**ME) return jsonify(**ME)
# FIXME(tsileo): implements pagination, also for the followers/following page older_than = newer_than = None
limit = 50 query_sort = -1
first_page = not request.args.get('older_than') and not request.args.get('newer_than')
limit = 5
q = { q = {
"box": Box.OUTBOX.value, "box": Box.OUTBOX.value,
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]}, "type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
@ -583,16 +604,35 @@ def index():
"meta.deleted": False, "meta.deleted": False,
"meta.undo": False, "meta.undo": False,
} }
c = request.args.get("cursor") query_older_than = request.args.get("older_than")
if c: query_newer_than = request.args.get("newer_than")
q["_id"] = {"$lt": ObjectId(c)} if query_older_than:
q["_id"] = {"$lt": ObjectId(query_older_than)}
elif query_newer_than:
q["_id"] = {"$gt": ObjectId(query_newer_than)}
query_sort = 1
outbox_data = list(DB.activities.find(q, limit=limit).sort("_id", -1)) outbox_data = list(DB.activities.find(q, limit=limit+1).sort("_id", query_sort))
cursor = None outbox_len = len(outbox_data)
if outbox_data and len(outbox_data) == limit: outbox_data = sorted(outbox_data[:limit], key=lambda x: str(x["_id"]), reverse=True)
cursor = str(outbox_data[-1]["_id"])
return render_template("index.html", outbox_data=outbox_data, cursor=cursor) if query_older_than:
newer_than = str(outbox_data[0]["_id"])
if outbox_len == limit + 1:
older_than = str(outbox_data[-1]["_id"])
elif query_newer_than:
older_than = str(outbox_data[-1]["_id"])
if outbox_len == limit + 1:
newer_than = str(outbox_data[0]["_id"])
elif first_page and outbox_len == limit + 1:
older_than = str(outbox_data[-1]["_id"])
return render_template(
"index.html",
outbox_data=outbox_data,
older_than=older_than,
newer_than=newer_than,
)
@app.route("/with_replies") @app.route("/with_replies")
@ -1352,21 +1392,17 @@ def api_new_note():
if "file" in request.files: if "file" in request.files:
file = request.files["file"] file = request.files["file"]
rfilename = secure_filename(file.filename) rfilename = secure_filename(file.filename)
prefix = hashlib.sha256(os.urandom(32)).hexdigest()[:6] with BytesIO() as buf:
file.save(buf)
oid = MEDIA_CACHE.save_upload(buf, rfilename)
mtype = mimetypes.guess_type(rfilename)[0] mtype = mimetypes.guess_type(rfilename)[0]
filename = f"{prefix}_{rfilename}"
file.save(os.path.join("static", "media", filename))
# Remove EXIF metadata
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
piexif.remove(os.path.join("static", "media", filename))
raw_note["attachment"] = [ raw_note["attachment"] = [
{ {
"mediaType": mtype, "mediaType": mtype,
"name": rfilename, "name": rfilename,
"type": "Document", "type": "Document",
"url": BASE_URL + f"/static/media/{filename}", "url": f"{BASE_URL}/uploads/{oid}/{rfilename}",
} }
] ]

View file

@ -14,6 +14,7 @@ from little_boxes import strtobool
from utils.key import KEY_DIR from utils.key import KEY_DIR
from utils.key import get_key from utils.key import get_key
from utils.key import get_secret_key from utils.key import get_secret_key
from utils.media import MediaCache
class ThemeStyle(Enum): class ThemeStyle(Enum):
@ -98,6 +99,7 @@ mongo_client = MongoClient(
DB_NAME = "{}_{}".format(USERNAME, DOMAIN.replace(".", "_")) DB_NAME = "{}_{}".format(USERNAME, DOMAIN.replace(".", "_"))
DB = mongo_client[DB_NAME] DB = mongo_client[DB_NAME]
GRIDFS = mongo_client[f"{DB_NAME}_gridfs"] GRIDFS = mongo_client[f"{DB_NAME}_gridfs"]
MEDIA_CACHE = MediaCache(GRIDFS, USER_AGENT)
def _drop_db(): def _drop_db():

View file

@ -31,7 +31,15 @@ a:hover {
.pcolor { .pcolor {
color: $primary-color; color: $primary-color;
} }
.lcolor {
color: $color-light;
}
.older-link, .newer-linker, .older-link:hover, .newer-link:hover {
text-decoration: none;
padding: 3px;
}
.newer-link { float: right }
.clear { clear: both; }
.remote-follow-button { .remote-follow-button {
background: $color-menu-background; background: $color-menu-background;
color: $color-light; color: $color-light;

View file

@ -34,7 +34,20 @@
{% endif %} {% endif %}
{% endfor %} {% endfor %}
<div class="clear">
{% if older_than %}
<a href="{{ config.BASE_URL }}{{ request.path }}?older_than={{older_than}}" rel="next" class="older-link lcolor"><span class="pcolor">🡨</span> Older</a>
{% endif %}
{% if newer_than %}
<a href="{{ config.BASE_URL }}{{ request.path }}?newer_than={{newer_than}}" rel="prev" class="newer-link lcolor">Newer <span class="pcolor">🡪</span></a>
{% endif %}
</div>
</div> </div>
</div> </div>
{% endblock %} {% endblock %}
{% block links %}
{% if older_than %}<link rel="next" href="{{ config.BASE_URL }}{{ request.path }}?older_than={{older_than}}">{% endif %}
{% if newer_than %}<link rel="prev" href="{{ config.BASE_URL }}{{ request.path }}?newer_than={{newer_than}}">{% endif %}
{% endblock %}

View file

@ -9,11 +9,12 @@
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth"> <link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token"> <link rel="token_endpoint" href="{{ config.ID }}/token">
<link rel="micropub" href="{{ config.ID }}/micropub"> <link rel="micropub" href="{{ config.ID }}/micropub">
<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}"> {% if not request.args.get("older_than") and not request.args.get("previous_than") %}<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">{% endif %}
<link rel="alternate" href="{{ config.ME.url | get_url }}" title="ActivityPub profile" type="application/activity+json"> <link rel="alternate" href="{{ config.ME.url | get_url }}" title="ActivityPub profile" type="application/activity+json">
<link rel="alternate" href="{{ config.ID }}/feed.json" type="application/json" title="JSON Feed"> <link rel="alternate" href="{{ config.ID }}/feed.json" type="application/json" title="JSON Feed">
<link rel="alternate" href="{{ config.ID }}/feed.rss" type="application/rss+xml" title="RSS"> <link rel="alternate" href="{{ config.ID }}/feed.rss" type="application/rss+xml" title="RSS">
<link rel="alternate" href="{{ config.ID }}/feed.atom" type="application/atom+xml" title="Atom 0.3"> <link rel="alternate" href="{{ config.ID }}/feed.atom" type="application/atom+xml" title="Atom 0.3">
{% block links %}{% endblock %}
{% if config.THEME_COLOR %}<meta name="theme-color" content="{{ config.THEME_COLOR }}">{% endif %} {% if config.THEME_COLOR %}<meta name="theme-color" content="{{ config.THEME_COLOR }}">{% endif %}
<style>{{ config.CSS | safe }}</style> <style>{{ config.CSS | safe }}</style>
</head> </head>

View file

@ -1,20 +1,21 @@
import base64 import base64
import mimetypes
from enum import Enum
from gzip import GzipFile from gzip import GzipFile
from io import BytesIO from io import BytesIO
from typing import Any from typing import Any
import mimetypes
from enum import Enum
import gridfs import gridfs
import piexif
import requests import requests
from PIL import Image from PIL import Image
def load(url, user_agent): def load(url, user_agent):
"""Initializes a `PIL.Image` from the URL.""" """Initializes a `PIL.Image` from the URL."""
# TODO(tsileo): user agent
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status() resp.raise_for_status()
resp.raw.decode_content = True
return Image.open(BytesIO(resp.raw.read())) return Image.open(BytesIO(resp.raw.read()))
@ -29,9 +30,10 @@ def to_data_uri(img):
class Kind(Enum): class Kind(Enum):
ATTACHMENT = "attachment" ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon" ACTOR_ICON = "actor_icon"
UPLOAD = "upload"
class ImageCache(object): class MediaCache(object):
def __init__(self, gridfs_db: str, user_agent: str) -> None: def __init__(self, gridfs_db: str, user_agent: str) -> None:
self.fs = gridfs.GridFS(gridfs_db) self.fs = gridfs.GridFS(gridfs_db)
self.user_agent = user_agent self.user_agent = user_agent
@ -62,9 +64,8 @@ class ImageCache(object):
# Save a thumbnail (gzipped) # Save a thumbnail (gzipped)
i.thumbnail((720, 720)) i.thumbnail((720, 720))
with BytesIO() as buf: with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf) with GzipFile(mode="wb", fileobj=buf) as f1:
i.save(f1, format=i.format) i.save(f1, format=i.format)
f1.close()
buf.seek(0) buf.seek(0)
self.fs.put( self.fs.put(
buf, buf,
@ -81,11 +82,10 @@ class ImageCache(object):
) as resp: ) as resp:
resp.raise_for_status() resp.raise_for_status()
with BytesIO() as buf: with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf) with GzipFile(mode="wb", fileobj=buf) as f1:
for chunk in resp.iter_content(): for chunk in resp.iter_content():
if chunk: if chunk:
f1.write(chunk) f1.write(chunk)
f1.close()
buf.seek(0) buf.seek(0)
self.fs.put( self.fs.put(
buf, buf,
@ -103,9 +103,8 @@ class ImageCache(object):
t1 = i.copy() t1 = i.copy()
t1.thumbnail((size, size)) t1.thumbnail((size, size))
with BytesIO() as buf: with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf) with GzipFile(mode="wb", fileobj=buf) as f1:
t1.save(f1, format=i.format) t1.save(f1, format=i.format)
f1.close()
buf.seek(0) buf.seek(0)
self.fs.put( self.fs.put(
buf, buf,
@ -115,6 +114,30 @@ class ImageCache(object):
kind=Kind.ACTOR_ICON.value, kind=Kind.ACTOR_ICON.value,
) )
def save_upload(self, obuf: BytesIO, filename: str) -> str:
# Remove EXIF metadata
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
obuf.seek(0)
with BytesIO() as buf2:
piexif.remove(obuf.getvalue(), buf2)
obuf.truncate(0)
obuf.write(buf2.getvalue())
obuf.seek(0)
mtype = mimetypes.guess_type(filename)[0]
with BytesIO() as gbuf:
with GzipFile(mode="wb", fileobj=gbuf) as gzipfile:
gzipfile.write(obuf.getvalue())
gbuf.seek(0)
oid = self.fs.put(
gbuf,
content_type=mtype,
upload_filename=filename,
kind=Kind.UPLOAD.value,
)
return str(oid)
def cache(self, url: str, kind: Kind) -> None: def cache(self, url: str, kind: Kind) -> None:
if kind == Kind.ACTOR_ICON: if kind == Kind.ACTOR_ICON:
self.cache_actor_icon(url) self.cache_actor_icon(url)