Store uplaods in MongoDB too, start pagination

This commit is contained in:
Thomas Sileo 2018-07-06 23:15:49 +02:00
parent 0d7a1b9b5a
commit acafc1cc85
6 changed files with 138 additions and 55 deletions

112
app.py
View file

@ -1,5 +1,4 @@
import binascii
import hashlib
import json
import logging
import mimetypes
@ -8,15 +7,16 @@ import urllib
from datetime import datetime
from datetime import timezone
from functools import wraps
from io import BytesIO
from typing import Any
from typing import Dict
from typing import Optional
from typing import Tuple
from urllib.parse import urlencode
from urllib.parse import urlparse
import bleach
import mf2py
import piexif
import pymongo
import timeago
from bson.objectid import ObjectId
@ -37,24 +37,22 @@ from passlib.hash import bcrypt
from u2flib_server import u2f
from werkzeug.utils import secure_filename
import activitypub
import config
from activitypub import Box
from activitypub import embed_collection
from config import USER_AGENT
from config import ADMIN_API_KEY
from config import BASE_URL
from config import DB
from config import DEBUG_MODE
from config import DOMAIN
from config import GRIDFS
from config import HEADERS
from config import ICON_URL
from config import ID
from config import JWT
from config import KEY
from config import ME
from config import MEDIA_CACHE
from config import PASS
from config import USERNAME
from config import VERSION
@ -73,13 +71,9 @@ from little_boxes.httpsig import HTTPSigAuth
from little_boxes.httpsig import verify_request
from little_boxes.webfinger import get_actor_url
from little_boxes.webfinger import get_remote_follow_template
from utils.img import ImageCache
from utils.img import Kind
from utils.key import get_secret_key
from utils.media import Kind
from utils.object_service import ObjectService
from typing import Optional
IMAGE_CACHE = ImageCache(GRIDFS, USER_AGENT)
OBJECT_SERVICE = ACTOR_SERVICE = ObjectService()
@ -198,13 +192,13 @@ def _get_file_url(url, size, kind):
if cached:
return cached
doc = IMAGE_CACHE.get_file(url, size, kind)
doc = MEDIA_CACHE.get_file(url, size, kind)
if doc:
u = f"/img/{str(doc._id)}"
u = f"/media/{str(doc._id)}"
_GRIDFS_CACHE[k] = u
return u
IMAGE_CACHE.cache(url, kind)
MEDIA_CACHE.cache(url, kind)
return _get_file_url(url, size, kind)
@ -395,10 +389,35 @@ def handle_activitypub_error(error):
# App routes
ROBOTS_TXT = """User-agent: *
Disallow: /admin/
Disallow: /static/
Disallow: /media/
Disallow: /uploads/"""
@app.route("/img/<img_id>")
def serve_img(img_id):
f = IMAGE_CACHE.fs.get(ObjectId(img_id))
@app.route("/robots.txt")
def robots_txt():
return Response(response=ROBOTS_TXT, headers={"Content-Type": "text/plain"})
@app.route("/media/<media_id>")
def serve_media(media_id):
f = MEDIA_CACHE.fs.get(ObjectId(media_id))
resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type)
resp.headers.set("Content-Length", f.length)
resp.headers.set("ETag", f.md5)
resp.headers.set(
"Last-Modified", f.uploadDate.strftime("%a, %d %b %Y %H:%M:%S GMT")
)
resp.headers.set("Cache-Control", "public,max-age=31536000,immutable")
resp.headers.set("Content-Encoding", "gzip")
return resp
@app.route("/uploads/<oid>/<fname>")
def serve_uploads(oid, fname):
f = MEDIA_CACHE.fs.get(ObjectId(oid))
resp = app.response_class(f, direct_passthrough=True, mimetype=f.content_type)
resp.headers.set("Content-Length", f.length)
resp.headers.set("ETag", f.md5)
@ -560,12 +579,12 @@ def tmp_migrate3():
activity = ap.parse_activity(activity["activity"])
actor = activity.get_actor()
if actor.icon:
IMAGE_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
MEDIA_CACHE.cache(actor.icon["url"], Kind.ACTOR_ICON)
if activity.type == ActivityType.CREATE.value:
for attachment in activity.get_object()._data.get("attachment", []):
IMAGE_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
except:
app.logger.exception('failed')
MEDIA_CACHE.cache(attachment["url"], Kind.ATTACHMENT)
except Exception:
app.logger.exception("failed")
return "Done"
@ -574,8 +593,10 @@ def index():
if is_api_request():
return jsonify(**ME)
# FIXME(tsileo): implements pagination, also for the followers/following page
limit = 50
older_than = newer_than = None
query_sort = -1
first_page = not request.args.get('older_than') and not request.args.get('newer_than')
limit = 5
q = {
"box": Box.OUTBOX.value,
"type": {"$in": [ActivityType.CREATE.value, ActivityType.ANNOUNCE.value]},
@ -583,16 +604,35 @@ def index():
"meta.deleted": False,
"meta.undo": False,
}
c = request.args.get("cursor")
if c:
q["_id"] = {"$lt": ObjectId(c)}
query_older_than = request.args.get("older_than")
query_newer_than = request.args.get("newer_than")
if query_older_than:
q["_id"] = {"$lt": ObjectId(query_older_than)}
elif query_newer_than:
q["_id"] = {"$gt": ObjectId(query_newer_than)}
query_sort = 1
outbox_data = list(DB.activities.find(q, limit=limit).sort("_id", -1))
cursor = None
if outbox_data and len(outbox_data) == limit:
cursor = str(outbox_data[-1]["_id"])
outbox_data = list(DB.activities.find(q, limit=limit+1).sort("_id", query_sort))
outbox_len = len(outbox_data)
outbox_data = sorted(outbox_data[:limit], key=lambda x: str(x["_id"]), reverse=True)
return render_template("index.html", outbox_data=outbox_data, cursor=cursor)
if query_older_than:
newer_than = str(outbox_data[0]["_id"])
if outbox_len == limit + 1:
older_than = str(outbox_data[-1]["_id"])
elif query_newer_than:
older_than = str(outbox_data[-1]["_id"])
if outbox_len == limit + 1:
newer_than = str(outbox_data[0]["_id"])
elif first_page and outbox_len == limit + 1:
older_than = str(outbox_data[-1]["_id"])
return render_template(
"index.html",
outbox_data=outbox_data,
older_than=older_than,
newer_than=newer_than,
)
@app.route("/with_replies")
@ -1352,21 +1392,17 @@ def api_new_note():
if "file" in request.files:
file = request.files["file"]
rfilename = secure_filename(file.filename)
prefix = hashlib.sha256(os.urandom(32)).hexdigest()[:6]
with BytesIO() as buf:
file.save(buf)
oid = MEDIA_CACHE.save_upload(buf, rfilename)
mtype = mimetypes.guess_type(rfilename)[0]
filename = f"{prefix}_{rfilename}"
file.save(os.path.join("static", "media", filename))
# Remove EXIF metadata
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
piexif.remove(os.path.join("static", "media", filename))
raw_note["attachment"] = [
{
"mediaType": mtype,
"name": rfilename,
"type": "Document",
"url": BASE_URL + f"/static/media/{filename}",
"url": f"{BASE_URL}/uploads/{oid}/{rfilename}",
}
]

View file

@ -14,6 +14,7 @@ from little_boxes import strtobool
from utils.key import KEY_DIR
from utils.key import get_key
from utils.key import get_secret_key
from utils.media import MediaCache
class ThemeStyle(Enum):
@ -98,6 +99,7 @@ mongo_client = MongoClient(
DB_NAME = "{}_{}".format(USERNAME, DOMAIN.replace(".", "_"))
DB = mongo_client[DB_NAME]
GRIDFS = mongo_client[f"{DB_NAME}_gridfs"]
MEDIA_CACHE = MediaCache(GRIDFS, USER_AGENT)
def _drop_db():

View file

@ -31,7 +31,15 @@ a:hover {
.pcolor {
color: $primary-color;
}
.lcolor {
color: $color-light;
}
.older-link, .newer-linker, .older-link:hover, .newer-link:hover {
text-decoration: none;
padding: 3px;
}
.newer-link { float: right }
.clear { clear: both; }
.remote-follow-button {
background: $color-menu-background;
color: $color-light;

View file

@ -34,7 +34,20 @@
{% endif %}
{% endfor %}
<div class="clear">
{% if older_than %}
<a href="{{ config.BASE_URL }}{{ request.path }}?older_than={{older_than}}" rel="next" class="older-link lcolor"><span class="pcolor">🡨</span> Older</a>
{% endif %}
{% if newer_than %}
<a href="{{ config.BASE_URL }}{{ request.path }}?newer_than={{newer_than}}" rel="prev" class="newer-link lcolor">Newer <span class="pcolor">🡪</span></a>
{% endif %}
</div>
</div>
</div>
{% endblock %}
{% block links %}
{% if older_than %}<link rel="next" href="{{ config.BASE_URL }}{{ request.path }}?older_than={{older_than}}">{% endif %}
{% if newer_than %}<link rel="prev" href="{{ config.BASE_URL }}{{ request.path }}?newer_than={{newer_than}}">{% endif %}
{% endblock %}

View file

@ -9,11 +9,12 @@
<link rel="authorization_endpoint" href="{{ config.ID }}/indieauth">
<link rel="token_endpoint" href="{{ config.ID }}/token">
<link rel="micropub" href="{{ config.ID }}/micropub">
<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">
{% if not request.args.get("older_than") and not request.args.get("previous_than") %}<link rel="canonical" href="https://{{ config.DOMAIN }}{{ request.path }}">{% endif %}
<link rel="alternate" href="{{ config.ME.url | get_url }}" title="ActivityPub profile" type="application/activity+json">
<link rel="alternate" href="{{ config.ID }}/feed.json" type="application/json" title="JSON Feed">
<link rel="alternate" href="{{ config.ID }}/feed.rss" type="application/rss+xml" title="RSS">
<link rel="alternate" href="{{ config.ID }}/feed.atom" type="application/atom+xml" title="Atom 0.3">
{% block links %}{% endblock %}
{% if config.THEME_COLOR %}<meta name="theme-color" content="{{ config.THEME_COLOR }}">{% endif %}
<style>{{ config.CSS | safe }}</style>
</head>

View file

@ -1,20 +1,21 @@
import base64
import mimetypes
from enum import Enum
from gzip import GzipFile
from io import BytesIO
from typing import Any
import mimetypes
from enum import Enum
import gridfs
import piexif
import requests
from PIL import Image
def load(url, user_agent):
"""Initializes a `PIL.Image` from the URL."""
# TODO(tsileo): user agent
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status()
resp.raw.decode_content = True
return Image.open(BytesIO(resp.raw.read()))
@ -29,9 +30,10 @@ def to_data_uri(img):
class Kind(Enum):
ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon"
UPLOAD = "upload"
class ImageCache(object):
class MediaCache(object):
def __init__(self, gridfs_db: str, user_agent: str) -> None:
self.fs = gridfs.GridFS(gridfs_db)
self.user_agent = user_agent
@ -62,9 +64,8 @@ class ImageCache(object):
# Save a thumbnail (gzipped)
i.thumbnail((720, 720))
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
with GzipFile(mode="wb", fileobj=buf) as f1:
i.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
@ -81,11 +82,10 @@ class ImageCache(object):
) as resp:
resp.raise_for_status()
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
with GzipFile(mode="wb", fileobj=buf) as f1:
for chunk in resp.iter_content():
if chunk:
f1.write(chunk)
f1.close()
buf.seek(0)
self.fs.put(
buf,
@ -103,9 +103,8 @@ class ImageCache(object):
t1 = i.copy()
t1.thumbnail((size, size))
with BytesIO() as buf:
f1 = GzipFile(mode="wb", fileobj=buf)
with GzipFile(mode="wb", fileobj=buf) as f1:
t1.save(f1, format=i.format)
f1.close()
buf.seek(0)
self.fs.put(
buf,
@ -115,6 +114,30 @@ class ImageCache(object):
kind=Kind.ACTOR_ICON.value,
)
def save_upload(self, obuf: BytesIO, filename: str) -> str:
# Remove EXIF metadata
if filename.lower().endswith(".jpg") or filename.lower().endswith(".jpeg"):
obuf.seek(0)
with BytesIO() as buf2:
piexif.remove(obuf.getvalue(), buf2)
obuf.truncate(0)
obuf.write(buf2.getvalue())
obuf.seek(0)
mtype = mimetypes.guess_type(filename)[0]
with BytesIO() as gbuf:
with GzipFile(mode="wb", fileobj=gbuf) as gzipfile:
gzipfile.write(obuf.getvalue())
gbuf.seek(0)
oid = self.fs.put(
gbuf,
content_type=mtype,
upload_filename=filename,
kind=Kind.UPLOAD.value,
)
return str(oid)
def cache(self, url: str, kind: Kind) -> None:
if kind == Kind.ACTOR_ICON:
self.cache_actor_icon(url)