microblog.pub/app/outgoing_activities.py

304 lines
9.8 KiB
Python
Raw Normal View History

import asyncio
2022-06-22 13:11:22 -05:00
import email
import time
import traceback
from datetime import datetime
from datetime import timedelta
import httpx
from loguru import logger
2022-06-29 01:56:39 -05:00
from sqlalchemy import func
from sqlalchemy import select
2022-07-06 12:04:38 -05:00
from sqlalchemy.orm import joinedload
2022-06-22 13:11:22 -05:00
from app import activitypub as ap
2022-06-29 13:43:17 -05:00
from app import config
from app import ldsig
2022-06-22 13:11:22 -05:00
from app import models
2022-07-06 14:13:55 -05:00
from app.actor import LOCAL_ACTOR
from app.actor import _actor_hash
2022-07-04 13:25:27 -05:00
from app.config import KEY_PATH
2022-06-29 13:43:17 -05:00
from app.database import AsyncSession
from app.key import Key
from app.utils.datetime import now
2022-07-15 13:50:27 -05:00
from app.utils.url import check_url
from app.utils.workers import Worker
2022-06-22 13:11:22 -05:00
_MAX_RETRIES = 16
2022-06-29 13:43:17 -05:00
k = Key(config.ID, f"{config.ID}#main-key")
2022-07-04 13:25:27 -05:00
k.load(KEY_PATH.read_text())
2022-06-22 13:11:22 -05:00
2022-06-29 13:43:17 -05:00
2022-07-06 14:13:55 -05:00
def _is_local_actor_updated() -> bool:
"""Returns True if the local actor was updated, i.e. updated via the config file"""
actor_hash = _actor_hash(LOCAL_ACTOR)
actor_hash_cache = config.ROOT_DIR / "data" / "local_actor_hash.dat"
if not actor_hash_cache.exists():
logger.info("Initializing local actor hash cache")
actor_hash_cache.write_bytes(actor_hash)
return False
previous_actor_hash = actor_hash_cache.read_bytes()
if previous_actor_hash == actor_hash:
logger.info("Local actor hasn't been updated")
return False
actor_hash_cache.write_bytes(actor_hash)
logger.info("Local actor has been updated")
return True
async def _send_actor_update_if_needed(
db_session: AsyncSession,
) -> None:
2022-07-06 14:13:55 -05:00
"""The process for sending an update for the local actor is done here as
in production, we may have multiple uvicorn worker and this worker will
always run in a single process."""
if not _is_local_actor_updated():
return
logger.info("Will send an Update for the local actor")
from app.boxes import allocate_outbox_id
from app.boxes import outbox_object_id
from app.boxes import save_outbox_object
2022-07-06 14:13:55 -05:00
update_activity_id = allocate_outbox_id()
update_activity = {
"@context": ap.AS_EXTENDED_CTX,
"id": outbox_object_id(update_activity_id),
"type": "Update",
"to": [ap.AS_PUBLIC],
"actor": config.ID,
"object": ap.remove_context(LOCAL_ACTOR.ap_actor),
}
outbox_object = await save_outbox_object(
db_session, update_activity_id, update_activity
2022-07-06 14:13:55 -05:00
)
2022-07-06 15:32:21 -05:00
# Send the update to the followers collection and all the actor we have ever
# contacted
2022-07-06 14:13:55 -05:00
followers = (
(
await db_session.scalars(
2022-07-06 14:13:55 -05:00
select(models.Follower).options(joinedload(models.Follower.actor))
)
)
.unique()
.all()
)
for rcp in {
follower.actor.shared_inbox_url or follower.actor.inbox_url
for follower in followers
2022-07-10 09:04:37 -05:00
} | {
2022-07-06 15:32:21 -05:00
row.recipient
for row in await db_session.execute(
2022-07-06 15:32:21 -05:00
select(func.distinct(models.OutgoingActivity.recipient).label("recipient"))
)
}: # type: ignore
await new_outgoing_activity(
db_session,
2022-07-06 14:13:55 -05:00
recipient=rcp,
outbox_object_id=outbox_object.id,
)
await db_session.commit()
2022-07-06 14:13:55 -05:00
2022-06-29 13:43:17 -05:00
async def new_outgoing_activity(
db_session: AsyncSession,
2022-06-22 13:11:22 -05:00
recipient: str,
2022-07-10 09:04:37 -05:00
outbox_object_id: int | None = None,
2022-07-06 12:04:38 -05:00
inbox_object_id: int | None = None,
webmention_target: str | None = None,
2022-06-22 13:11:22 -05:00
) -> models.OutgoingActivity:
2022-07-06 12:04:38 -05:00
if outbox_object_id is None and inbox_object_id is None:
raise ValueError("Must reference at least one inbox/outbox activity")
if webmention_target and outbox_object_id is None:
raise ValueError("Webmentions must reference an outbox activity")
if outbox_object_id and inbox_object_id:
2022-07-06 12:04:38 -05:00
raise ValueError("Cannot reference both inbox/outbox activities")
2022-06-22 13:11:22 -05:00
outgoing_activity = models.OutgoingActivity(
recipient=recipient,
outbox_object_id=outbox_object_id,
2022-07-06 12:04:38 -05:00
inbox_object_id=inbox_object_id,
webmention_target=webmention_target,
2022-06-22 13:11:22 -05:00
)
2022-06-29 13:43:17 -05:00
db_session.add(outgoing_activity)
2022-07-20 14:40:27 -05:00
await db_session.flush()
2022-06-29 13:43:17 -05:00
await db_session.refresh(outgoing_activity)
2022-06-22 13:11:22 -05:00
return outgoing_activity
def _parse_retry_after(retry_after: str) -> datetime | None:
try:
# Retry-After: 120
seconds = int(retry_after)
except ValueError:
# Retry-After: Wed, 21 Oct 2015 07:28:00 GMT
dt_tuple = email.utils.parsedate_tz(retry_after)
if dt_tuple is None:
return None
seconds = int(email.utils.mktime_tz(dt_tuple) - time.time())
return now() + timedelta(seconds=seconds)
def _exp_backoff(tries: int) -> datetime:
seconds = 2 * (2 ** (tries - 1))
return now() + timedelta(seconds=seconds)
def _set_next_try(
outgoing_activity: models.OutgoingActivity,
next_try: datetime | None = None,
) -> None:
if not outgoing_activity.tries:
raise ValueError("Should never happen")
if outgoing_activity.tries == _MAX_RETRIES:
outgoing_activity.is_errored = True
outgoing_activity.next_try = None
else:
outgoing_activity.next_try = next_try or _exp_backoff(outgoing_activity.tries)
async def fetch_next_outgoing_activity(
db_session: AsyncSession,
in_fligh: set[int],
) -> models.OutgoingActivity | None:
2022-06-29 01:56:39 -05:00
where = [
models.OutgoingActivity.next_try <= now(),
models.OutgoingActivity.is_errored.is_(False),
models.OutgoingActivity.is_sent.is_(False),
models.OutgoingActivity.id.not_in(in_fligh),
2022-06-29 01:56:39 -05:00
]
q_count = await db_session.scalar(
select(func.count(models.OutgoingActivity.id)).where(*where)
)
2022-07-05 01:14:50 -05:00
if q_count > 0:
logger.info(f"{q_count} outgoing activities ready to process")
2022-06-22 13:11:22 -05:00
if not q_count:
2022-07-05 01:14:50 -05:00
# logger.debug("No activities to process")
return None
next_activity = (
await db_session.execute(
select(models.OutgoingActivity)
.where(*where)
.limit(1)
.options(
joinedload(models.OutgoingActivity.inbox_object),
joinedload(models.OutgoingActivity.outbox_object),
)
.order_by(models.OutgoingActivity.next_try)
2022-07-06 12:04:38 -05:00
)
2022-06-29 01:56:39 -05:00
).scalar_one()
return next_activity
2022-06-22 13:11:22 -05:00
async def process_next_outgoing_activity(
db_session: AsyncSession,
next_activity: models.OutgoingActivity,
) -> None:
next_activity.tries = next_activity.tries + 1 # type: ignore
2022-06-22 13:11:22 -05:00
next_activity.last_try = now()
logger.info(f"recipient={next_activity.recipient}")
2022-07-02 03:33:20 -05:00
2022-06-22 13:11:22 -05:00
try:
if next_activity.webmention_target and next_activity.outbox_object:
webmention_payload = {
"source": next_activity.outbox_object.url,
"target": next_activity.webmention_target,
}
logger.info(f"{webmention_payload=}")
2022-07-15 13:50:27 -05:00
check_url(next_activity.recipient)
async with httpx.AsyncClient() as client:
resp = await client.post(
next_activity.recipient, # type: ignore
data=webmention_payload,
headers={
"User-Agent": config.USER_AGENT,
},
)
resp.raise_for_status()
else:
payload = ap.wrap_object_if_needed(next_activity.anybox_object.ap_object)
# Use LD sig if the activity may need to be forwarded by recipients
if next_activity.anybox_object.is_from_outbox and payload["type"] in [
"Create",
"Update",
"Delete",
]:
# But only if the object is public (to help with deniability/privacy)
if next_activity.outbox_object.visibility == ap.VisibilityEnum.PUBLIC: # type: ignore # noqa: E501
ldsig.generate_signature(payload, k)
logger.info(f"{payload=}")
resp = await ap.post(next_activity.recipient, payload) # type: ignore
2022-06-22 13:11:22 -05:00
except httpx.HTTPStatusError as http_error:
logger.exception("Failed")
next_activity.last_status_code = http_error.response.status_code
next_activity.last_response = http_error.response.text
next_activity.error = traceback.format_exc()
if http_error.response.status_code in [429, 503]:
retry_after: datetime | None = None
if retry_after_value := http_error.response.headers.get("Retry-After"):
retry_after = _parse_retry_after(retry_after_value)
_set_next_try(next_activity, retry_after)
2022-07-04 13:25:27 -05:00
elif http_error.response.status_code == 401:
_set_next_try(next_activity)
2022-06-22 13:11:22 -05:00
elif 400 <= http_error.response.status_code < 500:
logger.info(f"status_code={http_error.response.status_code} not retrying")
next_activity.is_errored = True
next_activity.next_try = None
else:
_set_next_try(next_activity)
except Exception:
logger.exception("Failed")
next_activity.error = traceback.format_exc()
_set_next_try(next_activity)
else:
logger.info("Success")
next_activity.is_sent = True
next_activity.last_status_code = resp.status_code
next_activity.last_response = resp.text
await db_session.commit()
return None
class OutgoingActivityWorker(Worker[models.OutgoingActivity]):
async def process_message(
self,
db_session: AsyncSession,
next_activity: models.OutgoingActivity,
) -> None:
await process_next_outgoing_activity(db_session, next_activity)
async def get_next_message(
self,
db_session: AsyncSession,
) -> models.OutgoingActivity | None:
return await fetch_next_outgoing_activity(db_session, self.in_flight_ids())
2022-06-22 13:11:22 -05:00
async def startup(self, db_session: AsyncSession) -> None:
await _send_actor_update_if_needed(db_session)
2022-06-22 13:11:22 -05:00
async def loop() -> None:
await OutgoingActivityWorker(workers_count=3).run_forever()
2022-06-22 13:11:22 -05:00
if __name__ == "__main__":
asyncio.run(loop())