microblog.pub/utils/actor_service.py

95 lines
3 KiB
Python
Raw Normal View History

2018-05-18 13:41:41 -05:00
import logging
2018-06-16 15:02:10 -05:00
from urllib.parse import urlparse
2018-05-18 13:41:41 -05:00
import requests
from Crypto.PublicKey import RSA
2018-05-29 14:36:05 -05:00
from .errors import ActivityNotFoundError
2018-06-16 15:02:10 -05:00
from .urlutils import check_url
2018-05-25 17:03:30 -05:00
2018-05-18 13:41:41 -05:00
logger = logging.getLogger(__name__)
class NotAnActorError(Exception):
def __init__(self, activity):
self.activity = activity
class ActorService(object):
def __init__(self, user_agent, col, actor_id, actor_data, instances):
2018-06-17 12:21:59 -05:00
logger.debug(f"Initializing ActorService user_agent={user_agent}")
2018-05-18 13:41:41 -05:00
self._user_agent = user_agent
self._col = col
self._in_mem = {actor_id: actor_data}
self._instances = instances
self._known_instances = set()
def _fetch(self, actor_url):
2018-06-17 12:21:59 -05:00
logger.debug(f"fetching remote object {actor_url}")
2018-05-25 17:03:30 -05:00
check_url(actor_url)
2018-06-17 12:21:59 -05:00
resp = requests.get(
actor_url,
headers={
"Accept": "application/activity+json",
"User-Agent": self._user_agent,
},
)
2018-05-29 14:36:05 -05:00
if resp.status_code == 404:
2018-06-17 12:21:59 -05:00
raise ActivityNotFoundError(
f"{actor_url} cannot be fetched, 404 not found error"
)
2018-05-29 14:36:05 -05:00
2018-05-18 13:41:41 -05:00
resp.raise_for_status()
return resp.json()
def get(self, actor_url, reload_cache=False):
2018-06-17 12:21:59 -05:00
logger.info(f"get actor {actor_url} (reload_cache={reload_cache})")
2018-05-18 13:41:41 -05:00
if actor_url in self._in_mem:
return self._in_mem[actor_url]
2018-06-17 12:21:59 -05:00
instance = urlparse(actor_url)._replace(path="", query="", fragment="").geturl()
2018-05-18 13:41:41 -05:00
if instance not in self._known_instances:
self._known_instances.add(instance)
2018-06-17 12:21:59 -05:00
if not self._instances.find_one({"instance": instance}):
self._instances.insert(
{"instance": instance, "first_object": actor_url}
)
2018-05-18 13:41:41 -05:00
if reload_cache:
actor = self._fetch(actor_url)
self._in_mem[actor_url] = actor
2018-06-17 12:21:59 -05:00
self._col.update(
{"actor_id": actor_url},
{"$set": {"cached_response": actor}},
upsert=True,
)
2018-05-18 13:41:41 -05:00
return actor
2018-06-17 12:21:59 -05:00
cached_actor = self._col.find_one({"actor_id": actor_url})
2018-05-18 13:41:41 -05:00
if cached_actor:
2018-06-17 12:21:59 -05:00
return cached_actor["cached_response"]
2018-05-18 13:41:41 -05:00
actor = self._fetch(actor_url)
2018-06-17 12:21:59 -05:00
if not "type" in actor:
2018-05-18 13:41:41 -05:00
raise NotAnActorError(None)
2018-06-17 12:21:59 -05:00
if actor["type"] != "Person":
2018-05-18 13:41:41 -05:00
raise NotAnActorError(actor)
2018-06-17 12:21:59 -05:00
self._col.update(
{"actor_id": actor_url}, {"$set": {"cached_response": actor}}, upsert=True
)
2018-05-18 13:41:41 -05:00
self._in_mem[actor_url] = actor
return actor
def get_public_key(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
2018-06-17 12:21:59 -05:00
pub = profile["publicKey"]
return pub["id"], RSA.importKey(pub["publicKeyPem"])
2018-05-18 13:41:41 -05:00
def get_inbox_url(self, actor_url, reload_cache=False):
profile = self.get(actor_url, reload_cache=reload_cache)
2018-06-17 12:21:59 -05:00
return profile.get("inbox")