From 52bc600832ad11302c9b486473be2eff8a4cc41f Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Sun, 1 Sep 2019 20:58:51 +0200 Subject: [PATCH] Index hashtags and mentions --- app.py | 29 +++++++++++++++-------------- core/activitypub.py | 15 +++++++++++++-- core/indexes.py | 2 ++ core/meta.py | 11 +++++++++++ core/migrations.py | 25 +++++++++++++++++++++++++ 5 files changed, 66 insertions(+), 16 deletions(-) diff --git a/app.py b/app.py index e3e17a3..2940d42 100644 --- a/app.py +++ b/app.py @@ -55,10 +55,13 @@ from core.db import find_one_activity from core.meta import Box from core.meta import MetaKey from core.meta import _meta +from core.meta import by_hashtag from core.meta import by_remote_id from core.meta import by_type +from core.meta import by_visibility from core.meta import in_outbox from core.meta import is_public +from core.meta import not_deleted from core.meta import not_undo from core.shared import _build_thread from core.shared import _get_ip @@ -875,9 +878,10 @@ def following(): def tags(tag): if not DB.activities.count( { - "box": Box.OUTBOX.value, - "activity.object.tag.type": "Hashtag", - "activity.object.tag.name": "#" + tag, + **in_outbox(), + **by_hashtag(tag), + **by_visibility(ap.Visibility.PUBLIC), + **not_deleted(), } ): abort(404) @@ -888,23 +892,20 @@ def tags(tag): tag=tag, outbox_data=DB.activities.find( { - "box": Box.OUTBOX.value, - "type": ActivityType.CREATE.value, - "meta.deleted": False, - "activity.object.tag.type": "Hashtag", - "activity.object.tag.name": "#" + tag, + **in_outbox(), + **by_hashtag(tag), + **by_visibility(ap.Visibility.PUBLIC), + **not_deleted(), } ), ) ) _log_sig() q = { - "box": Box.OUTBOX.value, - "meta.deleted": False, - "meta.undo": False, - "type": ActivityType.CREATE.value, - "activity.object.tag.type": "Hashtag", - "activity.object.tag.name": "#" + tag, + **in_outbox(), + **by_hashtag(tag), + **by_visibility(ap.Visibility.PUBLIC), + **not_deleted(), } return activitypubify( **activitypub.build_ordered_collection( diff --git a/core/activitypub.py b/core/activitypub.py index 65e488c..4522484 100644 --- a/core/activitypub.py +++ b/core/activitypub.py @@ -128,9 +128,20 @@ def save(box: Box, activity: ap.BaseActivity) -> None: actor_id = activity.get_actor().id # Set some "type"-related neta - extra = {} - if box == Box.OUTBOX and activity.has_type(ap.Follow): + extra: Dict[str, Any] = {} + if box == Box.OUTBOX and activity.has_type(ap.ActivityType.FOLLOW): extra[MetaKey.FOLLOW_STATUS.value] = FollowStatus.WAITING.value + elif activity.has_type(ap.ActivityType.CREATE): + mentions = [] + obj = activity.get_object() + for m in obj.get_mentions(): + mentions.append(m.href) + hashtags = [] + for h in obj.get_hashtags(): + hashtags.append(h.name[1:]) # Strip the # + extra.update( + {MetaKey.MENTIONS.value: mentions, MetaKey.HASHTAGS.value: hashtags} + ) DB.activities.insert_one( { diff --git a/core/indexes.py b/core/indexes.py index a78ba02..9df3485 100644 --- a/core/indexes.py +++ b/core/indexes.py @@ -26,6 +26,8 @@ def create_indexes(): DB.activities.create_index([("remote_id", pymongo.ASCENDING)]) DB.activities.create_index([("meta.actor_id", pymongo.ASCENDING)]) DB.activities.create_index([("meta.object_id", pymongo.ASCENDING)]) + DB.activities.create_index([("meta.mentions", pymongo.ASCENDING)]) + DB.activities.create_index([("meta.hashtags", pymongo.ASCENDING)]) DB.activities.create_index([("meta.thread_root_parent", pymongo.ASCENDING)]) DB.activities.create_index( [ diff --git a/core/meta.py b/core/meta.py index 5e5a750..519ebbf 100644 --- a/core/meta.py +++ b/core/meta.py @@ -46,6 +46,9 @@ class MetaKey(Enum): OBJECT_ACTOR_HASH = "object_actor_hash" PUBLIC = "public" + HASHTAGS = "hashtags" + MENTIONS = "mentions" + FOLLOW_STATUS = "follow_status" THREAD_ROOT_PARENT = "thread_root_parent" @@ -121,6 +124,14 @@ def is_public() -> _SubQuery: return flag(MetaKey.PUBLIC, True) +def by_visibility(vis: ap.Visibility) -> _SubQuery: + return flag(MetaKey.VISIBILITY, vis.name) + + +def by_hashtag(ht: str) -> _SubQuery: + return flag(MetaKey.HASHTAGS, ht) + + def inc(mk: MetaKey, val: int) -> _SubQuery: return {"$inc": flag(mk, val)} diff --git a/core/migrations.py b/core/migrations.py index 8d7bcd4..db39438 100644 --- a/core/migrations.py +++ b/core/migrations.py @@ -19,6 +19,7 @@ from core.meta import by_remote_id from core.meta import by_type from core.meta import in_inbox from core.meta import in_outbox +from core.meta import not_deleted from core.meta import not_undo from core.meta import upsert from utils.migrations import Migration @@ -293,3 +294,27 @@ class _20190901_FollowFollowBackMigrationFix(Migration): except Exception: logger.exception(f"failed to process activity {data!r}") + + +class _20190901_MetaHashtagsAndMentions(Migration): + def migrate(self) -> None: + for data in find_activities( + {**by_type(ap.ActivityType.CREATE), **not_deleted()} + ): + try: + activity = ap.parse_activity(data["activity"]) + mentions = [] + obj = activity.get_object() + for m in obj.get_mentions(): + mentions.append(m.href) + hashtags = [] + for h in obj.get_hashtags(): + hashtags.append(h.name[1:]) # Strip the # + + update_one_activity( + by_remote_id(data["remote_id"]), + upsert({MetaKey.MENTIONS: mentions, MetaKey.HASHTAGS: hashtags}), + ) + + except Exception: + logger.exception(f"failed to process activity {data!r}")