microblog.pub/utils/object_service.py
2018-06-17 19:21:59 +02:00

115 lines
3.4 KiB
Python

from urllib.parse import urlparse
import requests
from .errors import ActivityNotFoundError
from .urlutils import check_url
class ObjectService(object):
def __init__(self, user_agent, col, inbox, outbox, instances):
self._user_agent = user_agent
self._col = col
self._inbox = inbox
self._outbox = outbox
self._instances = instances
self._known_instances = set()
def _fetch_remote(self, object_id):
print(f"fetch remote {object_id}")
check_url(object_id)
resp = requests.get(
object_id,
headers={
"Accept": "application/activity+json",
"User-Agent": self._user_agent,
},
)
if resp.status_code == 404:
raise ActivityNotFoundError(
f"{object_id} cannot be fetched, 404 error not found"
)
resp.raise_for_status()
return resp.json()
def _fetch(self, object_id):
instance = urlparse(object_id)._replace(path="", query="", fragment="").geturl()
if instance not in self._known_instances:
self._known_instances.add(instance)
if not self._instances.find_one({"instance": instance}):
self._instances.insert(
{"instance": instance, "first_object": object_id}
)
obj = self._inbox.find_one(
{
"$or": [
{"remote_id": object_id},
{"type": "Create", "activity.object.id": object_id},
]
}
)
if obj:
if obj["remote_id"] == object_id:
return obj["activity"]
return obj["activity"]["object"]
obj = self._outbox.find_one(
{
"$or": [
{"remote_id": object_id},
{"type": "Create", "activity.object.id": object_id},
]
}
)
if obj:
if obj["remote_id"] == object_id:
return obj["activity"]
return obj["activity"]["object"]
return self._fetch_remote(object_id)
def get(
self,
object_id,
reload_cache=False,
part_of_stream=False,
announce_published=None,
):
if reload_cache:
obj = self._fetch(object_id)
self._col.update(
{"object_id": object_id},
{
"$set": {
"cached_object": obj,
"meta.part_of_stream": part_of_stream,
"meta.announce_published": announce_published,
}
},
upsert=True,
)
return obj
cached_object = self._col.find_one({"object_id": object_id})
if cached_object:
print(f"ObjectService: {cached_object}")
return cached_object["cached_object"]
obj = self._fetch(object_id)
self._col.update(
{"object_id": object_id},
{
"$set": {
"cached_object": obj,
"meta.part_of_stream": part_of_stream,
"meta.announce_published": announce_published,
}
},
upsert=True,
)
# print(f'ObjectService: {obj}')
return obj