microblog.pub/core/activitypub.py

623 lines
20 KiB
Python
Raw Normal View History

import binascii
2019-04-14 12:17:54 -05:00
import hashlib
2018-05-27 07:21:06 -05:00
import logging
2018-06-24 12:51:09 -05:00
import os
2018-05-18 13:41:41 -05:00
from datetime import datetime
from datetime import timezone
2018-06-16 15:02:10 -05:00
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from urllib.parse import urljoin
from urllib.parse import urlparse
2018-05-18 13:41:41 -05:00
from bson.objectid import ObjectId
2018-07-11 16:22:47 -05:00
from cachetools import LRUCache
from flask import url_for
2018-07-11 16:22:47 -05:00
from little_boxes import activitypub as ap
from little_boxes import strtobool
from little_boxes.activitypub import _to_list
from little_boxes.activitypub import clean_activity
from little_boxes.activitypub import format_datetime
2018-07-11 16:22:47 -05:00
from little_boxes.backend import Backend
from little_boxes.errors import ActivityGoneError
2018-05-18 13:41:41 -05:00
2018-06-16 15:02:10 -05:00
from config import BASE_URL
from config import DB
from config import EXTRA_INBOXES
2018-06-16 15:02:10 -05:00
from config import ID
from config import ME
from config import USER_AGENT
2019-08-01 15:00:26 -05:00
from core.meta import Box
2019-08-01 15:25:58 -05:00
from core.tasks import Tasks
2018-06-16 15:33:51 -05:00
2018-05-27 07:21:06 -05:00
logger = logging.getLogger(__name__)
_NewMeta = Dict[str, Any]
2018-05-18 13:41:41 -05:00
2018-07-11 13:04:48 -05:00
ACTORS_CACHE = LRUCache(maxsize=256)
MY_PERSON = ap.Person(**ME)
2018-07-11 13:04:48 -05:00
2018-06-16 14:24:53 -05:00
def _remove_id(doc: ap.ObjectType) -> ap.ObjectType:
2018-05-27 15:30:43 -05:00
"""Helper for removing MongoDB's `_id` field."""
2018-05-18 13:41:41 -05:00
doc = doc.copy()
2018-06-16 14:24:53 -05:00
if "_id" in doc:
2019-04-13 03:00:56 -05:00
del doc["_id"]
2018-05-18 13:41:41 -05:00
return doc
2019-04-14 12:17:54 -05:00
def _answer_key(choice: str) -> str:
h = hashlib.new("sha1")
h.update(choice.encode())
return h.hexdigest()
2019-07-17 09:14:29 -05:00
def _is_local_reply(create: ap.Create) -> bool:
for dest in _to_list(create.to or []):
if dest.startswith(BASE_URL):
return True
for dest in _to_list(create.cc or []):
if dest.startswith(BASE_URL):
return True
return False
def save(box: Box, activity: ap.BaseActivity) -> None:
"""Custom helper for saving an activity to the DB."""
visibility = ap.get_visibility(activity)
is_public = False
if visibility in [ap.Visibility.PUBLIC, ap.Visibility.UNLISTED]:
is_public = True
object_id = None
try:
object_id = activity.get_object_id()
except Exception: # TODO(tsileo): should be ValueError, but replies trigger a KeyError on object
pass
object_visibility = None
if activity.has_type(
[ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE, ap.ActivityType.LIKE]
):
object_visibility = ap.get_visibility(activity.get_object()).name
actor_id = activity.get_actor().id
DB.activities.insert_one(
{
"box": box.value,
"activity": activity.to_dict(),
"type": _to_list(activity.type),
"remote_id": activity.id,
"meta": {
"undo": False,
"deleted": False,
"public": is_public,
"server": urlparse(activity.id).netloc,
"visibility": visibility.name,
"actor_id": actor_id,
"object_id": object_id,
"object_visibility": object_visibility,
"poll_answer": False,
},
}
)
def outbox_is_blocked(actor_id: str) -> bool:
return bool(
DB.activities.find_one(
{
"box": Box.OUTBOX.value,
"type": ap.ActivityType.BLOCK.value,
"activity.object": actor_id,
"meta.undo": False,
}
)
)
def activity_url(item_id: str) -> str:
return urljoin(BASE_URL, url_for("outbox_detail", item_id=item_id))
def post_to_inbox(activity: ap.BaseActivity) -> None:
# Check for Block activity
actor = activity.get_actor()
if outbox_is_blocked(actor.id):
logger.info(
f"actor {actor!r} is blocked, dropping the received activity {activity!r}"
)
return
if DB.activities.find_one({"box": Box.INBOX.value, "remote_id": activity.id}):
# The activity is already in the inbox
logger.info(f"received duplicate activity {activity!r}, dropping it")
return
save(Box.INBOX, activity)
Tasks.process_new_activity(activity.id)
logger.info(f"spawning task for {activity!r}")
Tasks.finish_post_to_inbox(activity.id)
def post_to_outbox(activity: ap.BaseActivity) -> str:
if activity.has_type(ap.CREATE_TYPES):
activity = activity.build_create()
# Assign create a random ID
obj_id = binascii.hexlify(os.urandom(8)).decode("utf-8")
uri = activity_url(obj_id)
activity._data["id"] = uri
if activity.has_type(ap.ActivityType.CREATE):
activity._data["object"]["id"] = urljoin(
BASE_URL, url_for("outbox_activity", item_id=obj_id)
)
activity._data["object"]["url"] = urljoin(
BASE_URL, url_for("note_by_id", note_id=obj_id)
)
activity.reset_object_cache()
save(Box.OUTBOX, activity)
Tasks.cache_actor(activity.id)
Tasks.finish_post_to_outbox(activity.id)
return activity.id
2018-06-16 14:24:53 -05:00
class MicroblogPubBackend(Backend):
2018-06-21 17:55:50 -05:00
"""Implements a Little Boxes backend, backed by MongoDB."""
def base_url(self) -> str:
return BASE_URL
2018-06-24 12:51:09 -05:00
def debug_mode(self) -> bool:
return strtobool(os.getenv("MICROBLOGPUB_DEBUG", "false"))
2018-06-16 15:02:10 -05:00
def user_agent(self) -> str:
2018-06-21 17:55:50 -05:00
"""Setup a custom user agent."""
2018-06-16 15:02:10 -05:00
return USER_AGENT
def extra_inboxes(self) -> List[str]:
return EXTRA_INBOXES
def followers(self) -> List[str]:
q = {
"box": Box.INBOX.value,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
return [doc["activity"]["actor"] for doc in DB.activities.find(q)]
def followers_as_recipients(self) -> List[str]:
q = {
"box": Box.INBOX.value,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
recipients = []
for doc in DB.activities.find(q):
recipients.append(
doc["meta"]["actor"]["sharedInbox"] or doc["meta"]["actor"]["inbox"]
)
return list(set(recipients))
def following(self) -> List[str]:
q = {
"box": Box.OUTBOX.value,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
return [doc["activity"]["object"] for doc in DB.activities.find(q)]
2018-07-08 05:24:49 -05:00
def parse_collection(
2018-07-08 05:43:34 -05:00
self, payload: Optional[Dict[str, Any]] = None, url: Optional[str] = None
2018-07-08 05:24:49 -05:00
) -> List[str]:
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
# Resolve internal collections via MongoDB directly
if url == ID + "/followers":
return self.followers()
2018-07-08 05:24:49 -05:00
elif url == ID + "/following":
return self.following()
2018-07-08 05:24:49 -05:00
return super().parse_collection(payload, url)
def _fetch_iri(self, iri: str) -> ap.ObjectType: # noqa: C901
# Shortcut if the instance actor is fetched
2018-06-17 13:51:23 -05:00
if iri == ME["id"]:
return ME
# Internal collecitons handling
# Followers
if iri == MY_PERSON.followers:
followers = []
for data in DB.activities.find(
{
"box": Box.INBOX.value,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
):
followers.append(data["meta"]["actor_id"])
return {"type": "Collection", "items": followers}
# Following
if iri == MY_PERSON.following:
following = []
for data in DB.activities.find(
{
"box": Box.OUTBOX.value,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
):
following.append(data["meta"]["object_id"])
return {"type": "Collection", "items": following}
# TODO(tsileo): handle the liked collection too
2018-06-17 13:51:23 -05:00
# Check if the activity is owned by this server
if iri.startswith(BASE_URL):
2018-06-17 15:05:38 -05:00
is_a_note = False
2018-06-18 15:01:21 -05:00
if iri.endswith("/activity"):
iri = iri.replace("/activity", "")
2018-06-17 15:05:38 -05:00
is_a_note = True
2018-06-29 15:16:26 -05:00
data = DB.activities.find_one({"box": Box.OUTBOX.value, "remote_id": iri})
if data and data["meta"]["deleted"]:
raise ActivityGoneError(f"{iri} is gone")
2018-06-29 15:16:26 -05:00
if data and is_a_note:
return data["activity"]["object"]
elif data:
2018-06-17 14:54:16 -05:00
return data["activity"]
else:
# Check if the activity is stored in the inbox
2018-06-29 15:16:26 -05:00
data = DB.activities.find_one({"remote_id": iri})
2018-06-17 14:54:16 -05:00
if data:
if data["meta"]["deleted"]:
raise ActivityGoneError(f"{iri} is gone")
2018-06-17 14:54:16 -05:00
return data["activity"]
obj = DB.activities.find_one({"meta.object_id": iri, "type": "Create"})
if obj:
if obj["meta"]["deleted"]:
raise ActivityGoneError(f"{iri} is gone")
return obj["meta"].get("object") or obj["activity"]["object"]
# Check if it's cached because it's a follower
# Remove extra info (like the key hash if any)
cleaned_iri = iri.split("#")[0]
actor = DB.activities.find_one(
{
"meta.actor_id": cleaned_iri,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
)
2019-07-17 02:00:46 -05:00
# "type" check is here to skip old metadata for "old/buggy" followers
if (
actor
and actor["meta"].get("actor")
and "type" in actor["meta"]["actor"]
):
return actor["meta"]["actor"]
# Check if it's cached because it's a following
actor2 = DB.activities.find_one(
{
"meta.object_id": cleaned_iri,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
)
2019-07-17 02:00:46 -05:00
if (
actor2
and actor2["meta"].get("object")
and "type" in actor2["meta"]["object"]
):
return actor2["meta"]["object"]
2018-06-17 13:51:23 -05:00
# Fetch the URL via HTTP
2018-07-14 06:45:06 -05:00
logger.info(f"dereference {iri} via HTTP")
2018-06-16 15:33:51 -05:00
return super().fetch_iri(iri)
2019-07-04 16:22:38 -05:00
def fetch_iri(self, iri: str, no_cache=False) -> ap.ObjectType:
if not no_cache:
# Fetch the activity by checking the local DB first
2019-07-04 16:22:38 -05:00
data = self._fetch_iri(iri)
else:
data = super().fetch_iri(iri)
2018-07-11 13:04:48 -05:00
2018-07-14 06:45:06 -05:00
logger.debug(f"_fetch_iri({iri!r}) == {data!r}")
2018-07-11 13:04:48 -05:00
return data
def set_post_to_remote_inbox(self, cb):
self.post_to_remote_inbox_cb = cb
2018-07-23 17:14:35 -05:00
def _handle_replies_delete(
self, as_actor: ap.Person, in_reply_to: Optional[str]
) -> None:
2018-06-18 15:01:21 -05:00
if not in_reply_to:
pass
2018-06-29 15:16:26 -05:00
DB.activities.update_one(
2018-06-18 15:01:21 -05:00
{"activity.object.id": in_reply_to},
{"$inc": {"meta.count_reply": -1, "meta.count_direct_reply": -1}},
2018-06-29 15:16:26 -05:00
)
2018-06-18 15:01:21 -05:00
2019-04-14 12:17:54 -05:00
def _process_question_reply(self, create: ap.Create, question: ap.Question) -> None:
choice = create.get_object().name
# Ensure it's a valid choice
if choice not in [
c["name"] for c in question._data.get("oneOf", question.anyOf)
]:
logger.info("invalid choice")
return
# Check for duplicate votes
if DB.activities.find_one(
{
"activity.object.actor": create.get_actor().id,
"meta.answer_to": question.id,
}
):
logger.info("duplicate response")
return
# Update the DB
answer_key = _answer_key(choice)
DB.activities.update_one(
{"activity.object.id": question.id},
{
"$inc": {
"meta.question_replies": 1,
f"meta.question_answers.{answer_key}": 1,
}
},
)
DB.activities.update_one(
{"remote_id": create.id},
{
"$set": {
"meta.answer_to": question.id,
"meta.stream": False,
"meta.poll_answer": True,
}
},
2019-04-14 12:17:54 -05:00
)
return None
2018-06-18 15:01:21 -05:00
def _handle_replies(self, as_actor: ap.Person, create: ap.Create) -> None:
2018-06-22 17:29:06 -05:00
"""Go up to the root reply, store unknown replies in the `threads` DB and set the "meta.thread_root_parent"
key to make it easy to query a whole thread."""
2019-04-16 15:54:08 -05:00
in_reply_to = create.get_object().get_in_reply_to()
2018-06-18 15:01:21 -05:00
if not in_reply_to:
2018-06-22 18:04:58 -05:00
return
2018-06-18 15:01:21 -05:00
2018-06-22 17:29:06 -05:00
new_threads = []
root_reply = in_reply_to
2018-09-06 12:19:47 -05:00
reply = ap.fetch_remote_activity(root_reply)
2019-08-05 15:40:24 -05:00
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
# FIXME(tsileo): can be a 403 too, in this case what to do? not error at least
2018-06-22 17:29:06 -05:00
2019-04-14 12:17:54 -05:00
# Ensure the this is a local reply, of a question, with a direct "to" addressing
if (
reply.id.startswith(BASE_URL)
and reply.has_type(ap.ActivityType.QUESTION.value)
2019-07-17 09:14:29 -05:00
and _is_local_reply(create)
2019-04-14 12:17:54 -05:00
and not create.is_public()
):
return self._process_question_reply(create, reply)
elif (
create.id.startswith(BASE_URL)
and reply.has_type(ap.ActivityType.QUESTION.value)
and not create.is_public()
):
# Keep track of our own votes
DB.activities.update_one(
{"activity.object.id": reply.id, "box": "inbox"},
{"$set": {"meta.voted_for": create.get_object().name}},
)
return None
2018-06-29 15:16:26 -05:00
creply = DB.activities.find_one_and_update(
2018-06-18 15:01:21 -05:00
{"activity.object.id": in_reply_to},
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
2018-06-29 15:16:26 -05:00
)
if not creply:
# It means the activity is not in the inbox, and not in the outbox, we want to save it
save(Box.REPLIES, reply)
2018-06-29 15:16:26 -05:00
new_threads.append(reply.id)
2019-04-10 15:50:36 -05:00
# TODO(tsileo): parses the replies collection and import the replies?
2018-06-22 17:29:06 -05:00
while reply is not None:
2019-04-16 15:54:08 -05:00
in_reply_to = reply.get_in_reply_to()
2018-06-22 17:29:06 -05:00
if not in_reply_to:
break
root_reply = in_reply_to
2018-09-06 12:19:47 -05:00
reply = ap.fetch_remote_activity(root_reply)
2019-08-05 15:40:24 -05:00
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
2018-06-22 17:29:06 -05:00
q = {"activity.object.id": root_reply}
2018-06-29 15:16:26 -05:00
if not DB.activities.count(q):
save(Box.REPLIES, reply)
2018-06-22 17:29:06 -05:00
new_threads.append(reply.id)
2018-06-29 15:16:26 -05:00
DB.activities.update_one(
{"remote_id": create.id}, {"$set": {"meta.thread_root_parent": root_reply}}
)
DB.activities.update(
{"box": Box.REPLIES.value, "remote_id": {"$in": new_threads}},
2018-06-22 17:29:06 -05:00
{"$set": {"meta.thread_root_parent": root_reply}},
)
2018-06-17 13:51:23 -05:00
2018-05-18 13:41:41 -05:00
def embed_collection(total_items, first_page_id):
2018-06-21 17:55:50 -05:00
"""Helper creating a root OrderedCollection with a link to the first page."""
2018-05-28 12:46:23 -05:00
return {
2018-06-16 14:24:53 -05:00
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"totalItems": total_items,
2018-06-16 14:24:53 -05:00
"first": f"{first_page_id}?page=first",
2018-06-01 13:29:44 -05:00
"id": first_page_id,
2018-05-28 12:46:23 -05:00
}
def simple_build_ordered_collection(col_name, data):
return {
"@context": ap.COLLECTION_CTX,
"id": BASE_URL + "/" + col_name,
"totalItems": len(data),
"type": ap.ActivityType.ORDERED_COLLECTION.value,
2018-07-23 14:43:03 -05:00
"orderedItems": data,
}
2018-06-16 14:24:53 -05:00
def build_ordered_collection(
col, q=None, cursor=None, map_func=None, limit=50, col_name=None, first_page=False
):
2018-06-21 17:55:50 -05:00
"""Helper for building an OrderedCollection from a MongoDB query (with pagination support)."""
2018-05-18 13:41:41 -05:00
col_name = col_name or col.name
if q is None:
q = {}
if cursor:
2018-06-16 14:24:53 -05:00
q["_id"] = {"$lt": ObjectId(cursor)}
data = list(col.find(q, limit=limit).sort("_id", -1))
2018-05-18 13:41:41 -05:00
if not data:
# Returns an empty page if there's a cursor
if cursor:
return {
"@context": ap.COLLECTION_CTX,
"type": ap.ActivityType.ORDERED_COLLECTION_PAGE.value,
"id": BASE_URL + "/" + col_name + "?cursor=" + cursor,
"partOf": BASE_URL + "/" + col_name,
"totalItems": 0,
"orderedItems": [],
}
2018-05-18 13:41:41 -05:00
return {
"@context": ap.COLLECTION_CTX,
2018-06-16 14:24:53 -05:00
"id": BASE_URL + "/" + col_name,
"totalItems": 0,
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"orderedItems": [],
2018-05-18 13:41:41 -05:00
}
2018-06-16 14:24:53 -05:00
start_cursor = str(data[0]["_id"])
next_page_cursor = str(data[-1]["_id"])
2018-05-18 13:41:41 -05:00
total_items = col.find(q).count()
data = [_remove_id(doc) for doc in data]
if map_func:
data = [map_func(doc) for doc in data]
2018-06-04 12:13:04 -05:00
2018-05-18 13:41:41 -05:00
# No cursor, this is the first page and we return an OrderedCollection
if not cursor:
resp = {
2018-06-16 14:24:53 -05:00
"@context": ap.COLLECTION_CTX,
"id": f"{BASE_URL}/{col_name}",
"totalItems": total_items,
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"first": {
"id": f"{BASE_URL}/{col_name}?cursor={start_cursor}",
"orderedItems": data,
"partOf": f"{BASE_URL}/{col_name}",
"totalItems": total_items,
"type": ap.ActivityType.ORDERED_COLLECTION_PAGE.value,
},
2018-05-18 13:41:41 -05:00
}
if len(data) == limit:
2018-06-16 14:24:53 -05:00
resp["first"]["next"] = (
BASE_URL + "/" + col_name + "?cursor=" + next_page_cursor
)
2018-05-18 13:41:41 -05:00
2018-06-01 13:29:44 -05:00
if first_page:
2018-06-16 14:24:53 -05:00
return resp["first"]
2018-06-01 13:29:44 -05:00
2018-05-18 13:41:41 -05:00
return resp
# If there's a cursor, then we return an OrderedCollectionPage
resp = {
2018-06-16 14:24:53 -05:00
"@context": ap.COLLECTION_CTX,
"type": ap.ActivityType.ORDERED_COLLECTION_PAGE.value,
"id": BASE_URL + "/" + col_name + "?cursor=" + start_cursor,
"totalItems": total_items,
"partOf": BASE_URL + "/" + col_name,
"orderedItems": data,
2018-05-18 13:41:41 -05:00
}
if len(data) == limit:
2018-06-16 14:24:53 -05:00
resp["next"] = BASE_URL + "/" + col_name + "?cursor=" + next_page_cursor
2018-05-18 13:41:41 -05:00
2018-06-01 13:29:44 -05:00
if first_page:
2018-06-16 14:24:53 -05:00
return resp["first"]
2018-06-01 13:29:44 -05:00
# XXX(tsileo): implements prev with prev=<first item cursor>?
2018-05-18 13:41:41 -05:00
return resp
def _add_answers_to_question(raw_doc: Dict[str, Any]) -> None:
activity = raw_doc["activity"]
if (
ap._has_type(activity["type"], ap.ActivityType.CREATE)
and "object" in activity
and ap._has_type(activity["object"]["type"], ap.ActivityType.QUESTION)
):
for choice in activity["object"].get("oneOf", activity["object"].get("anyOf")):
choice["replies"] = {
"type": ap.ActivityType.COLLECTION.value,
"totalItems": raw_doc["meta"]
.get("question_answers", {})
.get(_answer_key(choice["name"]), 0),
}
now = datetime.now(timezone.utc)
if format_datetime(now) >= activity["object"]["endTime"]:
activity["object"]["closed"] = activity["object"]["endTime"]
def add_extra_collection(raw_doc: Dict[str, Any]) -> Dict[str, Any]:
if raw_doc["activity"]["type"] != ap.ActivityType.CREATE.value:
return raw_doc
raw_doc["activity"]["object"]["replies"] = embed_collection(
raw_doc.get("meta", {}).get("count_direct_reply", 0),
f'{raw_doc["remote_id"]}/replies',
)
raw_doc["activity"]["object"]["likes"] = embed_collection(
raw_doc.get("meta", {}).get("count_like", 0), f'{raw_doc["remote_id"]}/likes'
)
raw_doc["activity"]["object"]["shares"] = embed_collection(
raw_doc.get("meta", {}).get("count_boost", 0), f'{raw_doc["remote_id"]}/shares'
)
return raw_doc
def remove_context(activity: Dict[str, Any]) -> Dict[str, Any]:
if "@context" in activity:
del activity["@context"]
return activity
def activity_from_doc(raw_doc: Dict[str, Any], embed: bool = False) -> Dict[str, Any]:
raw_doc = add_extra_collection(raw_doc)
activity = clean_activity(raw_doc["activity"])
# Handle Questions
# TODO(tsileo): what about object embedded by ID/URL?
_add_answers_to_question(raw_doc)
if embed:
return remove_context(activity)
return activity