101 lines
2.9 KiB
Python
101 lines
2.9 KiB
Python
|
import logging
|
||
|
from datetime import datetime
|
||
|
from datetime import timedelta
|
||
|
from typing import Any
|
||
|
from typing import Dict
|
||
|
from typing import List
|
||
|
from urllib.parse import urlparse
|
||
|
|
||
|
from little_boxes import activitypub as ap
|
||
|
|
||
|
import activitypub
|
||
|
from activitypub import Box
|
||
|
from config import ID
|
||
|
from config import ME
|
||
|
from config import MEDIA_CACHE
|
||
|
from utils.migrations import DB
|
||
|
from utils.migrations import Migration
|
||
|
from utils.migrations import logger
|
||
|
|
||
|
back = activitypub.MicroblogPubBackend()
|
||
|
ap.use_backend(back)
|
||
|
|
||
|
MY_PERSON = ap.Person(**ME)
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
|
||
|
def threads_of_interest() -> List[str]:
|
||
|
out = set()
|
||
|
|
||
|
# Fetch all the threads we've participed in
|
||
|
for data in DB.activities.find(
|
||
|
{
|
||
|
"meta.thread_root_parent": {"$exists": True},
|
||
|
"box": Box.OUTBOX.value,
|
||
|
"type": ap.ActivityType.CREATE.value,
|
||
|
}
|
||
|
):
|
||
|
out.add(data["meta"]["thread_root_parent"])
|
||
|
|
||
|
# Fetch all threads related to bookmarked activities
|
||
|
for data in DB.activities.find({"meta.bookmarked": True}):
|
||
|
# Keep the replies
|
||
|
out.add(data["meta"]["object_id"])
|
||
|
# And the whole thread if any
|
||
|
if "thread_root_parent" in data["meta"]:
|
||
|
out.add(data["meta"]["thread_root_parent"])
|
||
|
|
||
|
return list(out)
|
||
|
|
||
|
|
||
|
def perform() -> None:
|
||
|
d = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d")
|
||
|
toi = threads_of_interest()
|
||
|
|
||
|
# Go over the old Create activities
|
||
|
for data in DB.activities.find(
|
||
|
{
|
||
|
"box": Box.INBOX.value,
|
||
|
"type": ap.ActivityType.CREATE.value,
|
||
|
"activity.published": {"$lt": d},
|
||
|
}
|
||
|
).limit(1000):
|
||
|
remote_id = data["remote_id"]
|
||
|
meta = data["meta"]
|
||
|
activity = ap.parse_activity(data["activity"])
|
||
|
logger.info(f"{activity}")
|
||
|
|
||
|
# This activity has been bookmarked, keep it
|
||
|
if meta.get("bookmarked"):
|
||
|
continue
|
||
|
|
||
|
# Inspect the object
|
||
|
obj = activity.get_object()
|
||
|
|
||
|
# This activity mentions the server actor, keep it
|
||
|
if obj.has_mention(ID):
|
||
|
continue
|
||
|
|
||
|
# This activity is a direct reply of one the server actor activity, keep it
|
||
|
in_reply_to = obj.get_in_reply_to()
|
||
|
if in_reply_to and in_reply_to.startswith(ID):
|
||
|
continue
|
||
|
|
||
|
# This activity is part of a thread we want to keep, keep it
|
||
|
if in_reply_to and meta.get("thread_root_parent"):
|
||
|
thread_root_parent = meta["thread_root_parent"]
|
||
|
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
|
||
|
continue
|
||
|
|
||
|
# This activity was boosted or liked, keep it
|
||
|
if meta.get("boosted") or meta.get("liked"):
|
||
|
continue
|
||
|
|
||
|
# Delete the cached attachment
|
||
|
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}):
|
||
|
MEDIA_CACHE.fs.delete(grid_item._id)
|
||
|
|
||
|
# Delete the activity
|
||
|
DB.activities.delete_one({"_id": data["_id"]})
|