microblog.pub/core/shared.py

import gzip
import json
import os
from functools import lru_cache
from functools import wraps
from typing import Any

import flask
from bson.objectid import ObjectId
from flask import Response
from flask import current_app as app
from flask import redirect
from flask import request
from flask import session
from flask import url_for
from flask_wtf.csrf import CSRFProtect
from little_boxes import activitypub as ap
from poussetaches import PousseTaches

import config
from config import DB
from config import ME
from core import activitypub
from core.db import find_activities
from core.meta import MetaKey
from core.meta import by_object_id
from core.meta import by_type
from core.meta import flag
from core.meta import not_deleted

# _Response = Union[flask.Response, werkzeug.wrappers.Response, str, Any]
_Response = Any

p = PousseTaches(
    os.getenv("MICROBLOGPUB_POUSSETACHES_HOST", "http://localhost:7991"),
    os.getenv("MICROBLOGPUB_INTERNAL_HOST", "http://localhost:5000"),
)


csrf = CSRFProtect()


back = activitypub.MicroblogPubBackend()
ap.use_backend(back)

MY_PERSON = ap.Person(**ME)


@lru_cache(512)
def build_resp(resp):
    """Encode the response to gzip if supported by the client."""
    headers = {"Cache-Control": "max-age=0, private, must-revalidate"}
    accept_encoding = request.headers.get("Accept-Encoding", "")
    if "gzip" in accept_encoding.lower():
        return (
            gzip.compress(resp.encode(), compresslevel=6),
            {**headers, "Vary": "Accept-Encoding", "Content-Encoding": "gzip"},
        )

    return resp, headers


def jsonify(data, content_type="application/json"):
    resp, headers = build_resp(json.dumps(data))
    return Response(headers={**headers, "Content-Type": content_type}, response=resp)


def htmlify(data):
    resp, headers = build_resp(data)
    return Response(
        response=resp, headers={**headers, "Content-Type": "text/html; charset=utf-8"}
    )


def activitypubify(**data):
    if "@context" not in data:
        data["@context"] = config.DEFAULT_CTX
    resp, headers = build_resp(json.dumps(data))
    return Response(
        response=resp, headers={**headers, "Content-Type": "application/activity+json"}
    )


def is_api_request():
    h = request.headers.get("Accept")
    if h is None:
        return False
    h = h.split(",")[0]
    if h in config.HEADERS or h == "application/json":
        return True
    return False


def add_response_headers(headers={}):
    """This decorator adds the headers passed in to the response"""

    def decorator(f):
        @wraps(f)
        def decorated_function(*args, **kwargs):
            resp = flask.make_response(f(*args, **kwargs))
            h = resp.headers
            for header, value in headers.items():
                h[header] = value
            return resp

        return decorated_function

    return decorator


def noindex(f):
    """This decorator passes X-Robots-Tag: noindex, nofollow"""
    return add_response_headers({"X-Robots-Tag": "noindex, nofollow"})(f)


def login_required(f):
    @wraps(f)
    def decorated_function(*args, **kwargs):
        if not session.get("logged_in"):
            return redirect(url_for("admin.admin_login", next=request.url))
        return f(*args, **kwargs)

    return decorated_function


def _get_ip():
    """Guess the IP address from the request. Only used for security purpose (failed logins or bad payload).

    Geoip will be returned if the "broxy" headers are set (it does Geoip
    using an offline database and append these special headers).
    """
    ip = request.headers.get("X-Forwarded-For", request.remote_addr)
    geoip = None
    if request.headers.get("Broxy-Geoip-Country"):
        geoip = (
            request.headers.get("Broxy-Geoip-Country")
            + "/"
            + request.headers.get("Broxy-Geoip-Region")
        )
    return ip, geoip


def _build_thread(data, include_children=True):  # noqa: C901
    data["_requested"] = True
    app.logger.info(f"_build_thread({data!r})")
    root_id = data["meta"].get(
        MetaKey.THREAD_ROOT_PARENT.value,
        data["meta"].get(MetaKey.OBJECT_ID.value, data["meta"].get("remote_id")),
    )

    replies = [data]
    for dat in find_activities(
        {**by_object_id(root_id), **not_deleted(), **by_type(ap.ActivityType.CREATE)}
    ):
        replies.append(dat)

    for dat in find_activities(
        {
            **flag(MetaKey.THREAD_ROOT_PARENT, root_id),
            **not_deleted(),
            **by_type(ap.ActivityType.CREATE),
        }
    ):
        replies.append(dat)

    for dat in DB.replies.find(
        {**flag(MetaKey.THREAD_ROOT_PARENT, root_id), **not_deleted()}
    ):
        # Make a Note/Question/... looks like a Create
        dat["meta"].update(
            {MetaKey.OBJECT_VISIBILITY.value: dat["meta"][MetaKey.VISIBILITY.value]}
        )
        dat = {
            "activity": {"object": dat["activity"]},
            "meta": dat["meta"],
            "_id": dat["_id"],
        }
        replies.append(dat)

    replies = sorted(replies, key=lambda d: d["meta"]["published"])

    # Index all the IDs in order to build a tree
    idx = {}
    replies2 = []
    for rep in replies:
        rep_id = rep["activity"]["object"]["id"]
        if rep_id in idx:
            continue
        idx[rep_id] = rep.copy()
        idx[rep_id]["_nodes"] = []
        replies2.append(rep)

    # Build the tree
    for rep in replies2:
        rep_id = rep["activity"]["object"]["id"]
        if rep_id == root_id:
            continue
        reply_of = ap._get_id(rep["activity"]["object"].get("inReplyTo"))
        try:
            idx[reply_of]["_nodes"].append(rep)
        except KeyError:
            app.logger.info(f"{reply_of} is not there! skipping {rep}")

    # Flatten the tree
    thread = []

    def _flatten(node, level=0):
        node["_level"] = level
        thread.append(node)

        for snode in sorted(
            idx[node["activity"]["object"]["id"]]["_nodes"],
            key=lambda d: d["activity"]["object"]["published"],
        ):
            _flatten(snode, level=level + 1)

    try:
        _flatten(idx[root_id])
    except KeyError:
        app.logger.info(f"{root_id} is not there! skipping")

    return thread


def paginated_query(db, q, limit=25, sort_key="_id"):
    older_than = newer_than = None
    query_sort = -1
    first_page = not request.args.get("older_than") and not request.args.get(
        "newer_than"
    )

    query_older_than = request.args.get("older_than")
    query_newer_than = request.args.get("newer_than")

    if query_older_than:
        q["_id"] = {"$lt": ObjectId(query_older_than)}
    elif query_newer_than:
        q["_id"] = {"$gt": ObjectId(query_newer_than)}
        query_sort = 1

    outbox_data = list(db.find(q, limit=limit + 1).sort(sort_key, query_sort))
    outbox_len = len(outbox_data)
    outbox_data = sorted(
        outbox_data[:limit], key=lambda x: str(x[sort_key]), reverse=True
    )

    if query_older_than:
        newer_than = str(outbox_data[0]["_id"])
        if outbox_len == limit + 1:
            older_than = str(outbox_data[-1]["_id"])
    elif query_newer_than:
        older_than = str(outbox_data[-1]["_id"])
        if outbox_len == limit + 1:
            newer_than = str(outbox_data[0]["_id"])
    elif first_page and outbox_len == limit + 1:
        older_than = str(outbox_data[-1]["_id"])

    return outbox_data, older_than, newer_than
Support gzip compression for JSON resp 2019-08-23 16:49:33 -05:00			`import gzip`
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`import json`
Split app 2019-08-01 12:55:30 -05:00			`import os`
Add caching for gzipped response 2019-08-23 17:24:57 -05:00			`from functools import lru_cache`
Split app 2019-08-01 12:55:30 -05:00			`from functools import wraps`
			`from typing import Any`

			`import flask`
			`from bson.objectid import ObjectId`
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`from flask import Response`
Split app 2019-08-01 12:55:30 -05:00			`from flask import current_app as app`
			`from flask import redirect`
			`from flask import request`
			`from flask import session`
			`from flask import url_for`
Start big cleanup 2019-07-30 15:12:20 -05:00			`from flask_wtf.csrf import CSRFProtect`
			`from little_boxes import activitypub as ap`
Split app 2019-08-01 12:55:30 -05:00			`from poussetaches import PousseTaches`
Start big cleanup 2019-07-30 15:12:20 -05:00
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`import config`
Split app 2019-08-01 12:55:30 -05:00			`from config import DB`
Fix flake8 2019-07-30 15:15:59 -05:00			`from config import ME`
More stuff moved 2019-08-01 15:00:26 -05:00			`from core import activitypub`
Fix thread display 2019-08-16 15:27:59 -05:00			`from core.db import find_activities`
			`from core.meta import MetaKey`
Fix threads display 2019-09-04 16:18:02 -05:00			`from core.meta import by_object_id`
Fix thread display 2019-08-16 15:27:59 -05:00			`from core.meta import by_type`
			`from core.meta import flag`
			`from core.meta import not_deleted`
Start big cleanup 2019-07-30 15:12:20 -05:00
Big cleanup part 2 (#58) * Cleanup little-boxes stuff * Force html5lib for parsing OG data * Bugfixes 2019-08-04 09:34:30 -05:00			`# _Response = Union[flask.Response, werkzeug.wrappers.Response, str, Any]`
			`_Response = Any`
Split app 2019-08-01 12:55:30 -05:00
			`p = PousseTaches(`
			`os.getenv("MICROBLOGPUB_POUSSETACHES_HOST", "http://localhost:7991"),`
			`os.getenv("MICROBLOGPUB_INTERNAL_HOST", "http://localhost:5000"),`
			`)`


Start big cleanup 2019-07-30 15:12:20 -05:00			`csrf = CSRFProtect()`


			`back = activitypub.MicroblogPubBackend()`
			`ap.use_backend(back)`

			`MY_PERSON = ap.Person(**ME)`


Add caching for gzipped response 2019-08-23 17:24:57 -05:00			`@lru_cache(512)`
Support gzip compression for JSON resp 2019-08-23 16:49:33 -05:00			`def build_resp(resp):`
			`"""Encode the response to gzip if supported by the client."""`
Cleanup JSON outputs 2019-08-24 03:58:35 -05:00			`headers = {"Cache-Control": "max-age=0, private, must-revalidate"}`
Support gzip compression for JSON resp 2019-08-23 16:49:33 -05:00			`accept_encoding = request.headers.get("Accept-Encoding", "")`
			`if "gzip" in accept_encoding.lower():`
			`return (`
			`gzip.compress(resp.encode(), compresslevel=6),`
Add Cache-Control back 2019-08-24 04:18:01 -05:00			`{**headers, "Vary": "Accept-Encoding", "Content-Encoding": "gzip"},`
Support gzip compression for JSON resp 2019-08-23 16:49:33 -05:00			`)`

			`return resp, headers`


Cleanup JSON outputs 2019-08-24 03:58:35 -05:00			`def jsonify(data, content_type="application/json"):`
			`resp, headers = build_resp(json.dumps(data))`
			`return Response(headers={**headers, "Content-Type": content_type}, response=resp)`


Tweak AP ctx, and gzip support for HTML resp 2019-08-24 03:41:31 -05:00			`def htmlify(data):`
			`resp, headers = build_resp(data)`
			`return Response(`
Cleanup JSON outputs 2019-08-24 03:58:35 -05:00			`response=resp, headers={**headers, "Content-Type": "text/html; charset=utf-8"}`
Tweak AP ctx, and gzip support for HTML resp 2019-08-24 03:41:31 -05:00			`)`


			`def activitypubify(**data):`
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`if "@context" not in data:`
			`data["@context"] = config.DEFAULT_CTX`
Support gzip compression for JSON resp 2019-08-23 16:49:33 -05:00			`resp, headers = build_resp(json.dumps(data))`
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`return Response(`
Cleanup JSON outputs 2019-08-24 03:58:35 -05:00			`response=resp, headers={**headers, "Content-Type": "application/activity+json"}`
Big cleanup part 3 (#59) * Remove dead code and re-organize * Switch to new queries helper 2019-08-04 13:08:47 -05:00			`)`


			`def is_api_request():`
			`h = request.headers.get("Accept")`
			`if h is None:`
			`return False`
			`h = h.split(",")[0]`
			`if h in config.HEADERS or h == "application/json":`
			`return True`
			`return False`


Split app 2019-08-01 12:55:30 -05:00			`def add_response_headers(headers={}):`
			`"""This decorator adds the headers passed in to the response"""`

			`def decorator(f):`
			`@wraps(f)`
			`def decorated_function(args, *kwargs):`
			`resp = flask.make_response(f(args, *kwargs))`
			`h = resp.headers`
			`for header, value in headers.items():`
			`h[header] = value`
			`return resp`

			`return decorated_function`

			`return decorator`


			`def noindex(f):`
			`"""This decorator passes X-Robots-Tag: noindex, nofollow"""`
			`return add_response_headers({"X-Robots-Tag": "noindex, nofollow"})(f)`


			`def login_required(f):`
			`@wraps(f)`
			`def decorated_function(args, *kwargs):`
			`if not session.get("logged_in"):`
Big cleanup part 2 (#58) * Cleanup little-boxes stuff * Force html5lib for parsing OG data * Bugfixes 2019-08-04 09:34:30 -05:00			`return redirect(url_for("admin.admin_login", next=request.url))`
Split app 2019-08-01 12:55:30 -05:00			`return f(args, *kwargs)`

			`return decorated_function`


			`def _get_ip():`
			`"""Guess the IP address from the request. Only used for security purpose (failed logins or bad payload).`

			`Geoip will be returned if the "broxy" headers are set (it does Geoip`
			`using an offline database and append these special headers).`
			`"""`
			`ip = request.headers.get("X-Forwarded-For", request.remote_addr)`
			`geoip = None`
			`if request.headers.get("Broxy-Geoip-Country"):`
			`geoip = (`
			`request.headers.get("Broxy-Geoip-Country")`
			`+ "/"`
			`+ request.headers.get("Broxy-Geoip-Region")`
			`)`
			`return ip, geoip`


			`def _build_thread(data, include_children=True): # noqa: C901`
			`data["_requested"] = True`
			`app.logger.info(f"_build_thread({data!r})")`
Fix template and improve threads 2019-09-01 07:19:33 -05:00			`root_id = data["meta"].get(`
Custom 500 and thread bugfix 2019-09-13 05:19:30 -05:00			`MetaKey.THREAD_ROOT_PARENT.value,`
			`data["meta"].get(MetaKey.OBJECT_ID.value, data["meta"].get("remote_id")),`
Fix template and improve threads 2019-09-01 07:19:33 -05:00			`)`
Split app 2019-08-01 12:55:30 -05:00
			`replies = [data]`
Fix threads display 2019-09-04 16:18:02 -05:00			`for dat in find_activities(`
			`{by_object_id(root_id), not_deleted(), **by_type(ap.ActivityType.CREATE)}`
			`):`
			`replies.append(dat)`

Fix thread display 2019-08-16 15:27:59 -05:00			`for dat in find_activities(`
			`{`
			`**flag(MetaKey.THREAD_ROOT_PARENT, root_id),`
			`**not_deleted(),`
			`**by_type(ap.ActivityType.CREATE),`
			`}`
			`):`
			`replies.append(dat)`

			`for dat in DB.replies.find(`
			`{flag(MetaKey.THREAD_ROOT_PARENT, root_id), not_deleted()}`
			`):`
			`# Make a Note/Question/... looks like a Create`
Fix internal thread display 2019-09-04 16:48:30 -05:00			`dat["meta"].update(`
			`{MetaKey.OBJECT_VISIBILITY.value: dat["meta"][MetaKey.VISIBILITY.value]}`
			`)`
Fix thread display 2019-08-16 15:27:59 -05:00			`dat = {`
			`"activity": {"object": dat["activity"]},`
			`"meta": dat["meta"],`
			`"_id": dat["_id"],`
			`}`
			`replies.append(dat)`

			`replies = sorted(replies, key=lambda d: d["meta"]["published"])`
Split app 2019-08-01 12:55:30 -05:00
			`# Index all the IDs in order to build a tree`
			`idx = {}`
			`replies2 = []`
			`for rep in replies:`
			`rep_id = rep["activity"]["object"]["id"]`
			`if rep_id in idx:`
			`continue`
			`idx[rep_id] = rep.copy()`
			`idx[rep_id]["_nodes"] = []`
			`replies2.append(rep)`

			`# Build the tree`
			`for rep in replies2:`
			`rep_id = rep["activity"]["object"]["id"]`
			`if rep_id == root_id:`
			`continue`
			`reply_of = ap._get_id(rep["activity"]["object"].get("inReplyTo"))`
			`try:`
			`idx[reply_of]["_nodes"].append(rep)`
			`except KeyError:`
			`app.logger.info(f"{reply_of} is not there! skipping {rep}")`

			`# Flatten the tree`
			`thread = []`

			`def _flatten(node, level=0):`
			`node["_level"] = level`
			`thread.append(node)`

			`for snode in sorted(`
			`idx[node["activity"]["object"]["id"]]["_nodes"],`
			`key=lambda d: d["activity"]["object"]["published"],`
			`):`
			`_flatten(snode, level=level + 1)`

			`try:`
			`_flatten(idx[root_id])`
			`except KeyError:`
			`app.logger.info(f"{root_id} is not there! skipping")`

			`return thread`


			`def paginated_query(db, q, limit=25, sort_key="_id"):`
			`older_than = newer_than = None`
			`query_sort = -1`
			`first_page = not request.args.get("older_than") and not request.args.get(`
			`"newer_than"`
			`)`

			`query_older_than = request.args.get("older_than")`
			`query_newer_than = request.args.get("newer_than")`

			`if query_older_than:`
			`q["_id"] = {"$lt": ObjectId(query_older_than)}`
			`elif query_newer_than:`
			`q["_id"] = {"$gt": ObjectId(query_newer_than)}`
			`query_sort = 1`

			`outbox_data = list(db.find(q, limit=limit + 1).sort(sort_key, query_sort))`
			`outbox_len = len(outbox_data)`
			`outbox_data = sorted(`
			`outbox_data[:limit], key=lambda x: str(x[sort_key]), reverse=True`
			`)`

			`if query_older_than:`
			`newer_than = str(outbox_data[0]["_id"])`
			`if outbox_len == limit + 1:`
			`older_than = str(outbox_data[-1]["_id"])`
			`elif query_newer_than:`
			`older_than = str(outbox_data[-1]["_id"])`
			`if outbox_len == limit + 1:`
			`newer_than = str(outbox_data[0]["_id"])`
			`elif first_page and outbox_len == limit + 1:`
			`older_than = str(outbox_data[-1]["_id"])`

			`return outbox_data, older_than, newer_than`