microblog.pub/utils/object_service.py

116 lines
3.4 KiB
Python
Raw Normal View History

2018-05-18 13:41:41 -05:00
from urllib.parse import urlparse
2018-06-16 15:02:10 -05:00
import requests
2018-05-29 14:36:05 -05:00
from .errors import ActivityNotFoundError
2018-06-16 15:02:10 -05:00
from .urlutils import check_url
2018-05-25 17:03:30 -05:00
2018-05-18 13:41:41 -05:00
class ObjectService(object):
def __init__(self, user_agent, col, inbox, outbox, instances):
self._user_agent = user_agent
self._col = col
self._inbox = inbox
self._outbox = outbox
self._instances = instances
self._known_instances = set()
def _fetch_remote(self, object_id):
2018-06-17 12:21:59 -05:00
print(f"fetch remote {object_id}")
2018-05-25 17:03:30 -05:00
check_url(object_id)
2018-06-17 12:21:59 -05:00
resp = requests.get(
object_id,
headers={
"Accept": "application/activity+json",
"User-Agent": self._user_agent,
},
)
2018-05-29 14:36:05 -05:00
if resp.status_code == 404:
2018-06-17 12:21:59 -05:00
raise ActivityNotFoundError(
f"{object_id} cannot be fetched, 404 error not found"
)
2018-05-29 14:36:05 -05:00
2018-05-18 13:41:41 -05:00
resp.raise_for_status()
return resp.json()
def _fetch(self, object_id):
2018-06-17 12:21:59 -05:00
instance = urlparse(object_id)._replace(path="", query="", fragment="").geturl()
2018-05-18 13:41:41 -05:00
if instance not in self._known_instances:
self._known_instances.add(instance)
2018-06-17 12:21:59 -05:00
if not self._instances.find_one({"instance": instance}):
self._instances.insert(
{"instance": instance, "first_object": object_id}
)
2018-05-18 13:41:41 -05:00
2018-06-17 12:21:59 -05:00
obj = self._inbox.find_one(
{
"$or": [
{"remote_id": object_id},
{"type": "Create", "activity.object.id": object_id},
]
}
)
2018-05-18 13:41:41 -05:00
if obj:
2018-06-17 12:21:59 -05:00
if obj["remote_id"] == object_id:
return obj["activity"]
return obj["activity"]["object"]
2018-05-18 13:41:41 -05:00
2018-06-17 12:21:59 -05:00
obj = self._outbox.find_one(
{
"$or": [
{"remote_id": object_id},
{"type": "Create", "activity.object.id": object_id},
]
}
)
2018-05-18 13:41:41 -05:00
if obj:
2018-06-17 12:21:59 -05:00
if obj["remote_id"] == object_id:
return obj["activity"]
return obj["activity"]["object"]
2018-05-18 13:41:41 -05:00
return self._fetch_remote(object_id)
2018-06-17 12:21:59 -05:00
def get(
self,
object_id,
reload_cache=False,
part_of_stream=False,
announce_published=None,
):
2018-05-18 13:41:41 -05:00
if reload_cache:
obj = self._fetch(object_id)
2018-06-17 12:21:59 -05:00
self._col.update(
{"object_id": object_id},
{
"$set": {
"cached_object": obj,
"meta.part_of_stream": part_of_stream,
"meta.announce_published": announce_published,
}
},
upsert=True,
)
2018-05-18 13:41:41 -05:00
return obj
2018-06-17 12:21:59 -05:00
cached_object = self._col.find_one({"object_id": object_id})
2018-05-18 13:41:41 -05:00
if cached_object:
2018-06-17 12:21:59 -05:00
print(f"ObjectService: {cached_object}")
return cached_object["cached_object"]
2018-05-18 13:41:41 -05:00
obj = self._fetch(object_id)
2018-06-17 12:21:59 -05:00
self._col.update(
{"object_id": object_id},
{
"$set": {
"cached_object": obj,
"meta.part_of_stream": part_of_stream,
"meta.announce_published": announce_published,
}
},
upsert=True,
)
2018-05-18 13:41:41 -05:00
# print(f'ObjectService: {obj}')
return obj