This commit is contained in:
Thomas Sileo 2019-07-21 21:47:52 +02:00
parent 2180a79cf2
commit 48618c9694
2 changed files with 49 additions and 40 deletions

View file

@ -226,6 +226,7 @@ ME = {
"publicKey": KEY.to_dict(), "publicKey": KEY.to_dict(),
} }
# Default emojis, space-separated, update `me.yml` to customize emojis
EMOJIS = "😺 😸 😹 😻 😼 😽 🙀 😿 😾" EMOJIS = "😺 😸 😹 😻 😼 😽 🙀 😿 😾"
if conf.get("emojis"): if conf.get("emojis"):
EMOJIS = conf["emojis"] EMOJIS = conf["emojis"]
@ -235,5 +236,8 @@ EMOJI_TPL = '<img src="https://cdn.jsdelivr.net/npm/twemoji@12.0.0/2/svg/{filena
if conf.get("emoji_tpl"): if conf.get("emoji_tpl"):
EMOJI_TPL = conf["emoji_tpl"] EMOJI_TPL = conf["emoji_tpl"]
# Host blacklist # Hosts blacklist
BLACKLIST = conf.get("blacklist", []) BLACKLIST = conf.get("blacklist", [])
# By default, we keep 14 of inbox data ; outbox is kept forever (along with bookmarked stuff, outbox replies, liked...)
DAYS_TO_KEEP = 14

83
gc.py
View file

@ -1,10 +1,7 @@
import logging import logging
from datetime import datetime from datetime import datetime
from datetime import timedelta from datetime import timedelta
from typing import Any
from typing import Dict
from typing import List from typing import List
from urllib.parse import urlparse
from little_boxes import activitypub as ap from little_boxes import activitypub as ap
@ -13,9 +10,8 @@ from activitypub import Box
from config import ID from config import ID
from config import ME from config import ME
from config import MEDIA_CACHE from config import MEDIA_CACHE
from config import DAYS_TO_KEEP
from utils.migrations import DB from utils.migrations import DB
from utils.migrations import Migration
from utils.migrations import logger
back = activitypub.MicroblogPubBackend() back = activitypub.MicroblogPubBackend()
ap.use_backend(back) ap.use_backend(back)
@ -50,8 +46,9 @@ def threads_of_interest() -> List[str]:
def perform() -> None: def perform() -> None:
d = (datetime.utcnow() - timedelta(days=2)).strftime("%Y-%m-%d") d = (datetime.utcnow() - timedelta(days=DAYS_TO_KEEP)).strftime("%Y-%m-%d")
toi = threads_of_interest() toi = threads_of_interest()
logger.info(f"thread_of_interest={toi!r}")
# Go over the old Create activities # Go over the old Create activities
for data in DB.activities.find( for data in DB.activities.find(
@ -60,41 +57,49 @@ def perform() -> None:
"type": ap.ActivityType.CREATE.value, "type": ap.ActivityType.CREATE.value,
"activity.published": {"$lt": d}, "activity.published": {"$lt": d},
} }
).limit(1000): ):
remote_id = data["remote_id"] try:
meta = data["meta"] remote_id = data["remote_id"]
activity = ap.parse_activity(data["activity"]) meta = data["meta"]
logger.info(f"{activity}") activity = ap.parse_activity(data["activity"])
logger.info(f"activity={activity!r}")
# This activity has been bookmarked, keep it # This activity has been bookmarked, keep it
if meta.get("bookmarked"): if meta.get("bookmarked"):
continue
# Inspect the object
obj = activity.get_object()
# This activity mentions the server actor, keep it
if obj.has_mention(ID):
continue
# This activity is a direct reply of one the server actor activity, keep it
in_reply_to = obj.get_in_reply_to()
if in_reply_to and in_reply_to.startswith(ID):
continue
# This activity is part of a thread we want to keep, keep it
if in_reply_to and meta.get("thread_root_parent"):
thread_root_parent = meta["thread_root_parent"]
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
continue continue
# This activity was boosted or liked, keep it # Inspect the object
if meta.get("boosted") or meta.get("liked"): obj = activity.get_object()
continue
# Delete the cached attachment # This activity mentions the server actor, keep it
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}): if obj.has_mention(ID):
MEDIA_CACHE.fs.delete(grid_item._id) continue
# Delete the activity # This activity is a direct reply of one the server actor activity, keep it
DB.activities.delete_one({"_id": data["_id"]}) in_reply_to = obj.get_in_reply_to()
if in_reply_to and in_reply_to.startswith(ID):
continue
# This activity is part of a thread we want to keep, keep it
if in_reply_to and meta.get("thread_root_parent"):
thread_root_parent = meta["thread_root_parent"]
if thread_root_parent.startswith(ID) or thread_root_parent in toi:
continue
# This activity was boosted or liked, keep it
if meta.get("boosted") or meta.get("liked"):
continue
# TODO(tsileo): remove after tests
if meta.get("keep"):
logger.warning(f"{activity!r} would not have been deleted, skipping for now")
continue
# Delete the cached attachment
for grid_item in MEDIA_CACHE.fs.find({"remote_id": remote_id}):
MEDIA_CACHE.fs.delete(grid_item._id)
# Delete the activity
DB.activities.delete_one({"_id": data["_id"]})
except Exception:
logger.exception(f"failed to process {data!r}")