Improve actor caching

This commit is contained in:
Thomas Sileo 2019-08-10 11:47:50 +02:00
parent 4bf91ffa80
commit 4433a552c0
3 changed files with 52 additions and 28 deletions

View file

@ -17,11 +17,16 @@ import config
from config import DB from config import DB
from core import gc from core import gc
from core.activitypub import Box from core.activitypub import Box
from core.activitypub import _actor_hash
from core.activitypub import _add_answers_to_question from core.activitypub import _add_answers_to_question
from core.activitypub import no_cache
from core.activitypub import post_to_outbox from core.activitypub import post_to_outbox
from core.db import update_one_activity
from core.inbox import process_inbox from core.inbox import process_inbox
from core.meta import MetaKey from core.meta import MetaKey
from core.meta import _meta from core.meta import by_remote_id
from core.meta import flag
from core.meta import upsert
from core.notifications import set_inbox_flags from core.notifications import set_inbox_flags
from core.outbox import process_outbox from core.outbox import process_outbox
from core.remote import track_failed_send from core.remote import track_failed_send
@ -135,16 +140,19 @@ def task_cache_object() -> _Response:
activity = ap.fetch_remote_activity(iri) activity = ap.fetch_remote_activity(iri)
app.logger.info(f"activity={activity!r}") app.logger.info(f"activity={activity!r}")
obj = activity.get_object() obj = activity.get_object()
DB.activities.update_one( obj_actor = obj.get_actor()
{"remote_id": activity.id}, obj_actor_hash = _actor_hash(obj_actor)
{
"$set": { cache = {MetaKey.OBJECT: obj.to_dict(embed=True)}
"meta.object": obj.to_dict(embed=True),
# FIXME(tsileo): set object actor only if different from actor? if activity.get_actor().id != obj_actor.id:
"meta.object_actor": obj.get_actor().to_dict(embed=True), cache[MetaKey.OBJECT_ACTOR] = obj_actor.to_dict(embed=True)
} cache[MetaKey.OBJECT_ACTOR_ID] = obj_actor.id
}, cache[MetaKey.OBJECT_ACTOR_HASH] = obj_actor_hash
)
# FIXME(tsileo): set OBJECT_ACTOR_HASH (like in "cache actor" and do an update_many even for ACTOR (not only
# OBJECT_ACTOR) ; a migration for OBJECT_ACTOR_ID/OBJECT_ACTOR_HASH needed?
update_one_activity(by_remote_id(activity.id), upsert(cache))
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError): except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}}) DB.activities.update_one({"remote_id": iri}, {"$set": {"meta.deleted": True}})
app.logger.exception(f"flagging activity {iri} as deleted, no object caching") app.logger.exception(f"flagging activity {iri} as deleted, no object caching")
@ -243,11 +251,16 @@ def task_cache_actor() -> _Response:
activity = ap.fetch_remote_activity(iri) activity = ap.fetch_remote_activity(iri)
app.logger.info(f"activity={activity!r}") app.logger.info(f"activity={activity!r}")
# Reload the actor without caching (in case it got upated)
with no_cache():
actor = ap.fetch_remote_activity(activity.get_actor().id)
actor_hash = _actor_hash(actor)
# Fetch the Open Grah metadata if it's a `Create` # Fetch the Open Grah metadata if it's a `Create`
if activity.has_type(ap.ActivityType.CREATE): if activity.has_type(ap.ActivityType.CREATE):
Tasks.fetch_og_meta(iri) Tasks.fetch_og_meta(iri)
actor = activity.get_actor()
if actor.icon: if actor.icon:
if isinstance(actor.icon, dict) and "url" in actor.icon: if isinstance(actor.icon, dict) and "url" in actor.icon:
config.MEDIA_CACHE.cache_actor_icon(actor.icon["url"]) config.MEDIA_CACHE.cache_actor_icon(actor.icon["url"])
@ -258,19 +271,24 @@ def task_cache_actor() -> _Response:
if actor.id == config.ID: if actor.id == config.ID:
# It's a new following, cache the "object" (which is the actor we follow) # It's a new following, cache the "object" (which is the actor we follow)
DB.activities.update_one( DB.activities.update_one(
{"remote_id": iri}, by_remote_id(iri),
{ upsert({MetaKey.OBJECT: activity.get_object().to_dict(embed=True)}),
"$set": {
"meta.object": activity.get_object().to_dict(embed=True)
}
},
) )
# Cache the actor info # Cache the actor info
DB.activities.update_one( DB.activities.update_many(
{"remote_id": iri}, {"$set": {"meta.actor": actor.to_dict(embed=True)}} {
**flag(MetaKey.ACTOR_ID, actor.id),
**flag(MetaKey.ACTOR_HASH, {"$ne": actor_hash}),
},
upsert({MetaKey.ACTOR: actor.to_dict(embed=True)}),
) )
# TODO(tsileo): Also update following (it's in the object)
# DB.activities.update_many(
# {"meta.object_id": actor.id}, {"$set": {"meta.object": actor.to_dict(embed=True)}}
# )
app.logger.info(f"actor cached for {iri}") app.logger.info(f"actor cached for {iri}")
if activity.has_type([ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE]): if activity.has_type([ap.ActivityType.CREATE, ap.ActivityType.ANNOUNCE]):
Tasks.cache_attachments(iri) Tasks.cache_attachments(iri)
@ -435,9 +453,9 @@ def task_process_new_activity() -> _Response:
flags = {} flags = {}
if not activity.published: if not activity.published:
flags[_meta(MetaKey.PUBLISHED)] = now() flags.update(flag(MetaKey.PUBLISHED, now()))
else: else:
flags[_meta(MetaKey.PUBLISHED)] = activity.published flags.update(flag(MetaKey.PUBLISHED, activity.published))
set_inbox_flags(activity, flags) set_inbox_flags(activity, flags)
app.logger.info(f"a={activity}, flags={flags!r}") app.logger.info(f"a={activity}, flags={flags!r}")

View file

@ -7,9 +7,9 @@ from datetime import datetime
from datetime import timezone from datetime import timezone
from typing import Any from typing import Any
from typing import Dict from typing import Dict
from typing import Iterator
from typing import List from typing import List
from typing import Optional from typing import Optional
from typing import Iterator
from urllib.parse import urljoin from urllib.parse import urljoin
from urllib.parse import urlparse from urllib.parse import urlparse
@ -311,21 +311,19 @@ class MicroblogPubBackend(Backend):
if data["meta"]["deleted"]: if data["meta"]["deleted"]:
raise ActivityGoneError(f"{iri} is gone") raise ActivityGoneError(f"{iri} is gone")
return data["activity"] return data["activity"]
# Check if we're looking for an object wrapped in a Create
obj = DB.activities.find_one({"meta.object_id": iri, "type": "Create"}) obj = DB.activities.find_one({"meta.object_id": iri, "type": "Create"})
if obj: if obj:
if obj["meta"]["deleted"]: if obj["meta"]["deleted"]:
raise ActivityGoneError(f"{iri} is gone") raise ActivityGoneError(f"{iri} is gone")
return obj["meta"].get("object") or obj["activity"]["object"] return obj["meta"].get("object") or obj["activity"]["object"]
# TODO(tsileo): also check the REPLIES box
# Check if it's cached because it's a follower # Check if it's cached because it's a follower
# Remove extra info (like the key hash if any) # Remove extra info (like the key hash if any)
cleaned_iri = iri.split("#")[0] cleaned_iri = iri.split("#")[0]
actor = DB.activities.find_one( actor = DB.activities.find_one(
{ {"meta.actor_id": cleaned_iri, "meta.actor": {"$exists": True}}
"meta.actor_id": cleaned_iri,
"type": ap.ActivityType.FOLLOW.value,
"meta.undo": False,
}
) )
# "type" check is here to skip old metadata for "old/buggy" followers # "type" check is here to skip old metadata for "old/buggy" followers

View file

@ -23,12 +23,16 @@ class MetaKey(Enum):
POLL_ANSWER = "poll_answer" POLL_ANSWER = "poll_answer"
STREAM = "stream" STREAM = "stream"
ACTOR_ID = "actor_id" ACTOR_ID = "actor_id"
ACTOR = "actor"
ACTOR_HASH = "actor_hash"
UNDO = "undo" UNDO = "undo"
PUBLISHED = "published" PUBLISHED = "published"
GC_KEEP = "gc_keep" GC_KEEP = "gc_keep"
OBJECT = "object" OBJECT = "object"
OBJECT_ID = "object_id" OBJECT_ID = "object_id"
OBJECT_ACTOR = "object_actor" OBJECT_ACTOR = "object_actor"
OBJECT_ACTOR_ID = "object_actor_id"
OBJECT_ACTOR_HASH = "object_actor_hash"
PUBLIC = "public" PUBLIC = "public"
DELETED = "deleted" DELETED = "deleted"
@ -86,3 +90,7 @@ def upsert(data: Dict[MetaKey, Any]) -> _SubQuery:
sq[_meta(mk)] = val sq[_meta(mk)] = val
return {"$set": sq} return {"$set": sq}
def flag(mk: MetaKey, val: Any) -> _SubQuery:
return {_meta(mk): val}