microblog.pub/activitypub.py

481 lines
16 KiB
Python
Raw Normal View History

2018-05-27 07:21:06 -05:00
import logging
2018-05-18 13:41:41 -05:00
from datetime import datetime
2018-06-16 15:02:10 -05:00
from typing import Any
from typing import Dict
from typing import List
from typing import Optional
from typing import Union
2018-05-18 13:41:41 -05:00
from bson.objectid import ObjectId
from feedgen.feed import FeedGenerator
2018-06-16 15:02:10 -05:00
from html2text import html2text
2018-05-18 13:41:41 -05:00
2018-06-16 15:02:10 -05:00
import tasks
from config import BASE_URL
from config import DB
from config import ID
from config import ME
from config import USER_AGENT
from config import USERNAME
2018-06-16 14:24:53 -05:00
from little_boxes import activitypub as ap
from little_boxes.backend import Backend
from little_boxes.collection import parse_collection as ap_parse_collection
2018-06-16 15:33:51 -05:00
from little_boxes.errors import Error
2018-06-17 13:51:23 -05:00
from little_boxes.errors import ActivityNotFoundError
2018-06-16 15:33:51 -05:00
2018-05-27 07:21:06 -05:00
logger = logging.getLogger(__name__)
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
def _remove_id(doc: ap.ObjectType) -> ap.ObjectType:
2018-05-27 15:30:43 -05:00
"""Helper for removing MongoDB's `_id` field."""
2018-05-18 13:41:41 -05:00
doc = doc.copy()
2018-06-16 14:24:53 -05:00
if "_id" in doc:
del (doc["_id"])
2018-05-18 13:41:41 -05:00
return doc
def _to_list(data: Union[List[Any], Any]) -> List[Any]:
2018-05-27 15:30:43 -05:00
"""Helper to convert fields that can be either an object or a list of objects to a list of object."""
2018-05-18 13:41:41 -05:00
if isinstance(data, list):
return data
return [data]
2018-06-16 15:33:51 -05:00
def ensure_it_is_me(f):
"""Method decorator used to track the events fired during tests."""
2018-06-17 12:21:59 -05:00
2018-06-16 15:33:51 -05:00
def wrapper(*args, **kwargs):
2018-06-17 13:51:23 -05:00
if args[1].id != ME["id"]:
2018-06-17 12:21:59 -05:00
raise Error("unexpected actor")
2018-06-16 15:33:51 -05:00
return f(*args, **kwargs)
2018-06-17 12:21:59 -05:00
2018-06-16 15:33:51 -05:00
return wrapper
2018-06-16 14:24:53 -05:00
class MicroblogPubBackend(Backend):
2018-06-16 15:02:10 -05:00
def user_agent(self) -> str:
return USER_AGENT
2018-06-16 14:24:53 -05:00
def base_url(self) -> str:
return BASE_URL
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
def activity_url(self, obj_id):
return f"{BASE_URL}/outbox/{obj_id}"
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_new(self, as_actor: ap.Person, activity: ap.BaseActivity) -> None:
DB.outbox.insert_one(
{
"activity": activity.to_dict(),
"type": activity.type,
"remote_id": activity.id,
"meta": {"undo": False, "deleted": False},
}
2018-05-18 13:41:41 -05:00
)
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_is_blocked(self, as_actor: ap.Person, actor_id: str) -> bool:
return bool(
DB.outbox.find_one(
{
"type": ap.ActivityType.BLOCK.value,
2018-06-17 14:54:16 -05:00
"activity.object": actor_id,
2018-06-16 14:24:53 -05:00
"meta.undo": False,
}
2018-05-18 13:41:41 -05:00
)
2018-06-16 14:24:53 -05:00
)
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
def fetch_iri(self, iri: str) -> ap.ObjectType:
2018-06-17 13:51:23 -05:00
if iri == ME["id"]:
return ME
# Check if the activity is owned by this server
if iri.startswith(BASE_URL):
data = DB.outbox.find_one({"remote_id": iri})
2018-06-17 14:54:16 -05:00
if data:
return data["activity"]
else:
# Check if the activity is stored in the inbox
data = DB.inbox.find_one({"remote_id": iri})
if data:
return data["activity"]
2018-06-17 13:51:23 -05:00
# Fetch the URL via HTTP
2018-06-16 15:33:51 -05:00
return super().fetch_iri(iri)
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_check_duplicate(self, as_actor: ap.Person, iri: str) -> bool:
return bool(DB.inbox.find_one({"remote_id": iri}))
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_new(self, as_actor: ap.Person, activity: ap.BaseActivity) -> None:
DB.inbox.insert_one(
{
"activity": activity.to_dict(),
"type": activity.type,
"remote_id": activity.id,
"meta": {"undo": False, "deleted": False},
}
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def post_to_remote_inbox(self, as_actor: ap.Person, payload: str, to: str) -> None:
tasks.post_to_inbox.delay(payload, to)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def new_follower(self, as_actor: ap.Person, follow: ap.Follow) -> None:
remote_actor = follow.get_actor().id
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
if DB.followers.find({"remote_actor": remote_actor}).count() == 0:
DB.followers.insert_one({"remote_actor": remote_actor})
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def undo_new_follower(self, as_actor: ap.Person, follow: ap.Follow) -> None:
# TODO(tsileo): update the follow to set undo
DB.followers.delete_one({"remote_actor": follow.get_actor().id})
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def undo_new_following(self, as_actor: ap.Person, follow: ap.Follow) -> None:
# TODO(tsileo): update the follow to set undo
DB.following.delete_one({"remote_actor": follow.get_object().id})
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def new_following(self, as_actor: ap.Person, follow: ap.Follow) -> None:
2018-06-17 14:54:16 -05:00
remote_actor = follow.get_object().id
2018-06-16 14:24:53 -05:00
if DB.following.find({"remote_actor": remote_actor}).count() == 0:
DB.following.insert_one({"remote_actor": remote_actor})
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_like(self, as_actor: ap.Person, like: ap.Like) -> None:
obj = like.get_object()
2018-05-18 13:41:41 -05:00
# Update the meta counter if the object is published by the server
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj.id}, {"$inc": {"meta.count_like": 1}}
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_undo_like(self, as_actor: ap.Person, like: ap.Like) -> None:
obj = like.get_object()
2018-05-18 13:41:41 -05:00
# Update the meta counter if the object is published by the server
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj.id}, {"$inc": {"meta.count_like": -1}}
)
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-17 13:51:23 -05:00
def outbox_like(self, as_actor: ap.Person, like: ap.Like) -> None:
2018-06-16 14:24:53 -05:00
obj = like.get_object()
2018-05-18 13:41:41 -05:00
# Unlikely, but an actor can like it's own post
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj.id}, {"$inc": {"meta.count_like": 1}}
)
2018-05-18 13:41:41 -05:00
2018-05-28 12:46:23 -05:00
# Keep track of the like we just performed
2018-06-16 14:24:53 -05:00
DB.inbox.update_one(
{"activity.object.id": obj.id}, {"$set": {"meta.liked": like.id}}
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_undo_like(self, as_actor: ap.Person, like: ap.Like) -> None:
obj = like.get_object()
2018-05-18 13:41:41 -05:00
# Unlikely, but an actor can like it's own post
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj.id}, {"$inc": {"meta.count_like": -1}}
)
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
DB.inbox.update_one(
{"activity.object.id": obj.id}, {"$set": {"meta.liked": False}}
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
if isinstance(announce._data["object"], str) and not announce._data[
"object"
].startswith("http"):
2018-05-18 13:41:41 -05:00
# TODO(tsileo): actually drop it without storing it and better logging, also move the check somewhere else
2018-05-28 12:46:23 -05:00
logger.warn(
2018-06-16 14:24:53 -05:00
f'received an Annouce referencing an OStatus notice ({announce._data["object"]}), dropping the message'
2018-05-28 12:46:23 -05:00
)
2018-05-18 13:41:41 -05:00
return
2018-06-16 14:24:53 -05:00
# FIXME(tsileo): Save/cache the object, and make it part of the stream so we can fetch it
if isinstance(announce._data["object"], str):
obj_iri = announce._data["object"]
2018-05-18 13:41:41 -05:00
else:
2018-06-16 14:24:53 -05:00
obj_iri = self.get_object().id
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj_iri}, {"$inc": {"meta.count_boost": 1}}
)
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_undo_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
obj = announce.get_object()
2018-05-28 12:46:23 -05:00
# Update the meta counter if the object is published by the server
2018-06-16 14:24:53 -05:00
DB.outbox.update_one(
{"activity.object.id": obj.id}, {"$inc": {"meta.count_boost": -1}}
)
2018-05-29 11:59:37 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
obj = announce.get_object()
DB.inbox.update_one(
{"activity.object.id": obj.id}, {"$set": {"meta.boosted": announce.id}}
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_undo_announce(self, as_actor: ap.Person, announce: ap.Announce) -> None:
obj = announce.get_object()
DB.inbox.update_one(
{"activity.object.id": obj.id}, {"$set": {"meta.boosted": False}}
)
2018-06-02 02:07:57 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_delete(self, as_actor: ap.Person, delete: ap.Delete) -> None:
DB.inbox.update_one(
{"activity.object.id": delete.get_object().id},
{"$set": {"meta.deleted": True}},
)
# FIXME(tsileo): handle threads
# obj = delete._get_actual_object()
# if obj.type_enum == ActivityType.NOTE:
# obj._delete_from_threads()
2018-05-29 11:59:37 -05:00
# TODO(tsileo): also purge the cache if it's a reply of a published activity
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def outbox_delete(self, as_actor: ap.Person, delete: ap.Delete) -> None:
DB.outbox.update_one(
{"activity.object.id": delete.get_object().id},
{"$set": {"meta.deleted": True}},
)
2018-05-18 13:41:41 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-16 14:24:53 -05:00
def inbox_update(self, as_actor: ap.Person, update: ap.Update) -> None:
obj = update.get_object()
if obj.ACTIVITY_TYPE == ap.ActivityType.NOTE:
DB.inbox.update_one(
{"activity.object.id": obj.id},
{"$set": {"activity.object": obj.to_dict()}},
)
2018-05-18 13:41:41 -05:00
return
2018-06-16 14:24:53 -05:00
# FIXME(tsileo): handle update actor amd inbox_update_note/inbox_update_actor
2018-06-02 02:07:57 -05:00
2018-06-16 15:33:51 -05:00
@ensure_it_is_me
2018-06-17 12:21:59 -05:00
def outbox_update(self, as_actor: ap.Person, _update: ap.Update) -> None:
obj = _update._data["object"]
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
update_prefix = "activity.object."
update: Dict[str, Any] = {"$set": dict(), "$unset": dict()}
update["$set"][f"{update_prefix}updated"] = (
datetime.utcnow().replace(microsecond=0).isoformat() + "Z"
)
2018-06-01 13:59:32 -05:00
for k, v in obj.items():
2018-06-16 14:24:53 -05:00
if k in ["id", "type"]:
2018-05-18 13:41:41 -05:00
continue
if v is None:
2018-06-16 14:24:53 -05:00
update["$unset"][f"{update_prefix}{k}"] = ""
2018-05-18 13:41:41 -05:00
else:
2018-06-16 14:24:53 -05:00
update["$set"][f"{update_prefix}{k}"] = v
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
if len(update["$unset"]) == 0:
del (update["$unset"])
2018-05-18 13:41:41 -05:00
2018-06-16 14:24:53 -05:00
print(f"updating note from outbox {obj!r} {update}")
logger.info(f"updating note from outbox {obj!r} {update}")
DB.outbox.update_one({"activity.object.id": obj["id"]}, update)
2018-05-18 13:41:41 -05:00
# FIXME(tsileo): should send an Update (but not a partial one, to all the note's recipients
# (create a new Update with the result of the update, and send it without saving it?)
2018-06-17 13:51:23 -05:00
def outbox_create(self, as_actor: ap.Person, create: ap.Create) -> None:
pass
def inbox_create(self, as_actor: ap.Person, create: ap.Create) -> None:
pass
2018-05-18 13:41:41 -05:00
def gen_feed():
fg = FeedGenerator()
2018-06-16 14:24:53 -05:00
fg.id(f"{ID}")
fg.title(f"{USERNAME} notes")
fg.author({"name": USERNAME, "email": "t@a4.io"})
fg.link(href=ID, rel="alternate")
fg.description(f"{USERNAME} notes")
fg.logo(ME.get("icon", {}).get("url"))
fg.language("en")
for item in DB.outbox.find({"type": "Create"}, limit=50):
2018-05-18 13:41:41 -05:00
fe = fg.add_entry()
2018-06-16 14:24:53 -05:00
fe.id(item["activity"]["object"].get("url"))
fe.link(href=item["activity"]["object"].get("url"))
fe.title(item["activity"]["object"]["content"])
fe.description(item["activity"]["object"]["content"])
2018-05-18 13:41:41 -05:00
return fg
def json_feed(path: str) -> Dict[str, Any]:
"""JSON Feed (https://jsonfeed.org/) document."""
data = []
2018-06-16 14:24:53 -05:00
for item in DB.outbox.find({"type": "Create"}, limit=50):
data.append(
{
"id": item["id"],
"url": item["activity"]["object"].get("url"),
"content_html": item["activity"]["object"]["content"],
"content_text": html2text(item["activity"]["object"]["content"]),
"date_published": item["activity"]["object"].get("published"),
}
)
2018-05-18 13:41:41 -05:00
return {
"version": "https://jsonfeed.org/version/1",
2018-06-16 14:24:53 -05:00
"user_comment": (
"This is a microblog feed. You can add this to your feed reader using the following URL: "
+ ID
+ path
),
2018-05-18 13:41:41 -05:00
"title": USERNAME,
"home_page_url": ID,
"feed_url": ID + path,
"author": {
"name": USERNAME,
"url": ID,
2018-06-16 14:24:53 -05:00
"avatar": ME.get("icon", {}).get("url"),
2018-05-18 13:41:41 -05:00
},
"items": data,
}
2018-06-16 14:24:53 -05:00
def build_inbox_json_feed(
path: str, request_cursor: Optional[str] = None
) -> Dict[str, Any]:
2018-05-18 13:41:41 -05:00
data = []
cursor = None
2018-06-16 14:24:53 -05:00
q: Dict[str, Any] = {"type": "Create", "meta.deleted": False}
2018-05-18 13:41:41 -05:00
if request_cursor:
2018-06-16 14:24:53 -05:00
q["_id"] = {"$lt": request_cursor}
for item in DB.inbox.find(q, limit=50).sort("_id", -1):
actor = ap.get_backend().fetch_iri(item["activity"]["actor"])
data.append(
{
"id": item["activity"]["id"],
"url": item["activity"]["object"].get("url"),
"content_html": item["activity"]["object"]["content"],
"content_text": html2text(item["activity"]["object"]["content"]),
"date_published": item["activity"]["object"].get("published"),
"author": {
"name": actor.get("name", actor.get("preferredUsername")),
"url": actor.get("url"),
"avatar": actor.get("icon", {}).get("url"),
},
}
)
cursor = str(item["_id"])
2018-05-18 13:41:41 -05:00
resp = {
"version": "https://jsonfeed.org/version/1",
2018-06-16 14:24:53 -05:00
"title": f"{USERNAME}'s stream",
2018-05-18 13:41:41 -05:00
"home_page_url": ID,
"feed_url": ID + path,
"items": data,
}
if cursor and len(data) == 50:
2018-06-16 14:24:53 -05:00
resp["next_url"] = ID + path + "?cursor=" + cursor
2018-05-18 13:41:41 -05:00
return resp
2018-06-16 14:24:53 -05:00
def parse_collection(
payload: Optional[Dict[str, Any]] = None, url: Optional[str] = None
) -> List[str]:
2018-05-18 13:41:41 -05:00
"""Resolve/fetch a `Collection`/`OrderedCollection`."""
# Resolve internal collections via MongoDB directly
2018-06-16 14:24:53 -05:00
if url == ID + "/followers":
return [doc["remote_actor"] for doc in DB.followers.find()]
elif url == ID + "/following":
return [doc["remote_actor"] for doc in DB.following.find()]
2018-05-18 13:41:41 -05:00
# Go through all the pages
2018-06-16 14:24:53 -05:00
return ap_parse_collection(payload, url)
2018-05-18 13:41:41 -05:00
def embed_collection(total_items, first_page_id):
2018-05-28 12:46:23 -05:00
return {
2018-06-16 14:24:53 -05:00
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"totalItems": total_items,
2018-06-16 14:24:53 -05:00
"first": f"{first_page_id}?page=first",
2018-06-01 13:29:44 -05:00
"id": first_page_id,
2018-05-28 12:46:23 -05:00
}
2018-06-16 14:24:53 -05:00
def build_ordered_collection(
col, q=None, cursor=None, map_func=None, limit=50, col_name=None, first_page=False
):
2018-05-18 13:41:41 -05:00
col_name = col_name or col.name
if q is None:
q = {}
if cursor:
2018-06-16 14:24:53 -05:00
q["_id"] = {"$lt": ObjectId(cursor)}
data = list(col.find(q, limit=limit).sort("_id", -1))
2018-05-18 13:41:41 -05:00
if not data:
return {
2018-06-16 14:24:53 -05:00
"id": BASE_URL + "/" + col_name,
"totalItems": 0,
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"orederedItems": [],
2018-05-18 13:41:41 -05:00
}
2018-06-16 14:24:53 -05:00
start_cursor = str(data[0]["_id"])
next_page_cursor = str(data[-1]["_id"])
2018-05-18 13:41:41 -05:00
total_items = col.find(q).count()
data = [_remove_id(doc) for doc in data]
if map_func:
data = [map_func(doc) for doc in data]
2018-06-04 12:13:04 -05:00
2018-05-18 13:41:41 -05:00
# No cursor, this is the first page and we return an OrderedCollection
if not cursor:
resp = {
2018-06-16 14:24:53 -05:00
"@context": ap.COLLECTION_CTX,
"id": f"{BASE_URL}/{col_name}",
"totalItems": total_items,
"type": ap.ActivityType.ORDERED_COLLECTION.value,
"first": {
"id": f"{BASE_URL}/{col_name}?cursor={start_cursor}",
"orderedItems": data,
"partOf": f"{BASE_URL}/{col_name}",
"totalItems": total_items,
"type": ap.ActivityType.ORDERED_COLLECTION_PAGE.value,
},
2018-05-18 13:41:41 -05:00
}
if len(data) == limit:
2018-06-16 14:24:53 -05:00
resp["first"]["next"] = (
BASE_URL + "/" + col_name + "?cursor=" + next_page_cursor
)
2018-05-18 13:41:41 -05:00
2018-06-01 13:29:44 -05:00
if first_page:
2018-06-16 14:24:53 -05:00
return resp["first"]
2018-06-01 13:29:44 -05:00
2018-05-18 13:41:41 -05:00
return resp
# If there's a cursor, then we return an OrderedCollectionPage
resp = {
2018-06-16 14:24:53 -05:00
"@context": ap.COLLECTION_CTX,
"type": ap.ActivityType.ORDERED_COLLECTION_PAGE.value,
"id": BASE_URL + "/" + col_name + "?cursor=" + start_cursor,
"totalItems": total_items,
"partOf": BASE_URL + "/" + col_name,
"orderedItems": data,
2018-05-18 13:41:41 -05:00
}
if len(data) == limit:
2018-06-16 14:24:53 -05:00
resp["next"] = BASE_URL + "/" + col_name + "?cursor=" + next_page_cursor
2018-05-18 13:41:41 -05:00
2018-06-01 13:29:44 -05:00
if first_page:
2018-06-16 14:24:53 -05:00
return resp["first"]
2018-06-01 13:29:44 -05:00
# XXX(tsileo): implements prev with prev=<first item cursor>?
2018-05-18 13:41:41 -05:00
return resp