Cleanup and add a unique request ID

This commit is contained in:
Thomas Sileo 2019-08-05 22:40:24 +02:00
parent 16f4af0463
commit 5ea22edcb8
8 changed files with 86 additions and 54 deletions

77
app.py
View file

@ -3,12 +3,13 @@ import logging
import os import os
import traceback import traceback
from datetime import datetime from datetime import datetime
from urllib.parse import urlparse from uuid import uuid4
from bson.objectid import ObjectId from bson.objectid import ObjectId
from flask import Flask from flask import Flask
from flask import Response from flask import Response
from flask import abort from flask import abort
from flask import g
from flask import jsonify as flask_jsonify from flask import jsonify as flask_jsonify
from flask import redirect from flask import redirect
from flask import render_template from flask import render_template
@ -49,8 +50,10 @@ from core.meta import Box
from core.meta import MetaKey from core.meta import MetaKey
from core.meta import _meta from core.meta import _meta
from core.meta import by_remote_id from core.meta import by_remote_id
from core.meta import by_type
from core.meta import in_outbox from core.meta import in_outbox
from core.meta import is_public from core.meta import is_public
from core.meta import not_undo
from core.shared import _build_thread from core.shared import _build_thread
from core.shared import _get_ip from core.shared import _get_ip
from core.shared import csrf from core.shared import csrf
@ -59,6 +62,7 @@ from core.shared import jsonify
from core.shared import login_required from core.shared import login_required
from core.shared import noindex from core.shared import noindex
from core.shared import paginated_query from core.shared import paginated_query
from utils.blacklist import is_blacklisted
from utils.key import get_secret_key from utils.key import get_secret_key
from utils.template_filters import filters from utils.template_filters import filters
@ -87,14 +91,6 @@ else:
root_logger.setLevel(gunicorn_logger.level) root_logger.setLevel(gunicorn_logger.level)
def is_blacklisted(url: str) -> bool:
try:
return urlparse(url).netloc in config.BLACKLIST
except Exception:
logger.exception(f"failed to blacklist for {url}")
return False
@app.context_processor @app.context_processor
def inject_config(): def inject_config():
q = { q = {
@ -153,18 +149,24 @@ def inject_config():
) )
@app.before_request
def generate_request_id():
g.request_id = uuid4().hex
@app.after_request @app.after_request
def set_x_powered_by(response): def set_x_powered_by(response):
response.headers["X-Powered-By"] = "microblog.pub" response.headers["X-Powered-By"] = "microblog.pub"
response.headers["X-Request-ID"] = g.request_id
return response return response
@app.errorhandler(ValueError) @app.errorhandler(ValueError)
def handle_value_error(error): def handle_value_error(error):
logger.error( logger.error(
f"caught value error: {error!r}, {traceback.format_tb(error.__traceback__)}" f"caught value error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
) )
response = flask_jsonify(message=error.args[0]) response = flask_jsonify(message=error.args[0], request_id=g.request_id)
response.status_code = 400 response.status_code = 400
return response return response
@ -172,9 +174,9 @@ def handle_value_error(error):
@app.errorhandler(Error) @app.errorhandler(Error)
def handle_activitypub_error(error): def handle_activitypub_error(error):
logger.error( logger.error(
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}" f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
) )
response = flask_jsonify(error.to_dict()) response = flask_jsonify({**error.to_dict(), "request_id": g.request_id})
response.status_code = error.status_code response.status_code = error.status_code
return response return response
@ -182,9 +184,9 @@ def handle_activitypub_error(error):
@app.errorhandler(TaskError) @app.errorhandler(TaskError)
def handle_task_error(error): def handle_task_error(error):
logger.error( logger.error(
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}" f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
) )
response = flask_jsonify({"traceback": error.message}) response = flask_jsonify({"traceback": error.message, "request_id": g.request_id})
response.status_code = 500 response.status_code = 500
return response return response
@ -274,7 +276,6 @@ def index():
"meta.public": True, "meta.public": True,
"$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}], "$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}],
} }
print(list(DB.activities.find(q)))
pinned = [] pinned = []
# Only fetch the pinned notes if we're on the first page # Only fetch the pinned notes if we're on the first page
@ -417,7 +418,6 @@ def outbox():
abort(401) abort(401)
data = request.get_json(force=True) data = request.get_json(force=True)
print(data)
activity = ap.parse_activity(data) activity = ap.parse_activity(data)
activity_id = post_to_outbox(activity) activity_id = post_to_outbox(activity)
@ -603,32 +603,21 @@ def inbox():
return Response( return Response(
status=422, status=422,
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
response=json.dumps({"error": "failed to decode request as JSON"}), response=json.dumps(
{
"error": "failed to decode request body as JSON",
"request_id": g.request_id,
}
),
) )
# Check the blacklist now to see if we can return super early # Check the blacklist now to see if we can return super early
if ( if is_blacklisted(data):
"id" in data
and is_blacklisted(data["id"])
or (
"object" in data
and isinstance(data["object"], dict)
and "id" in data["object"]
and is_blacklisted(data["object"]["id"])
)
or (
"object" in data
and isinstance(data["object"], str)
and is_blacklisted(data["object"])
)
):
logger.info(f"dropping activity from blacklisted host: {data['id']}") logger.info(f"dropping activity from blacklisted host: {data['id']}")
return Response(status=201) return Response(status=201)
print(f"req_headers={request.headers}") logger.info(f"request_id={g.request_id} req_headers={request.headers!r}")
print(f"raw_data={data}") logger.info(f"request_id={g.request_id} raw_data={data}")
logger.debug(f"req_headers={request.headers}")
logger.debug(f"raw_data={data}")
try: try:
if not verify_request( if not verify_request(
request.method, request.path, request.headers, request.data request.method, request.path, request.headers, request.data
@ -636,7 +625,7 @@ def inbox():
raise Exception("failed to verify request") raise Exception("failed to verify request")
except Exception: except Exception:
logger.exception( logger.exception(
"failed to verify request, trying to verify the payload by fetching the remote" f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote"
) )
try: try:
remote_data = get_backend().fetch_iri(data["id"]) remote_data = get_backend().fetch_iri(data["id"])
@ -669,7 +658,8 @@ def inbox():
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
response=json.dumps( response=json.dumps(
{ {
"error": "failed to verify request (using HTTP signatures or fetching the IRI)" "error": "failed to verify request (using HTTP signatures or fetching the IRI)",
"request_id": g.request_id,
} }
), ),
) )
@ -688,6 +678,7 @@ def inbox():
"geoip": geoip, "geoip": geoip,
"tb": traceback.format_exc(), "tb": traceback.format_exc(),
"headers": dict(request.headers), "headers": dict(request.headers),
"request_id": g.request_id,
}, },
} }
) )
@ -697,16 +688,16 @@ def inbox():
headers={"Content-Type": "application/json"}, headers={"Content-Type": "application/json"},
response=json.dumps( response=json.dumps(
{ {
"error": "failed to verify request (using HTTP signatures or fetching the IRI)" "error": "failed to verify request (using HTTP signatures or fetching the IRI)",
"request_id": g.request_id,
} }
), ),
) )
# We fetched the remote data successfully # We fetched the remote data successfully
data = remote_data data = remote_data
print(data)
activity = ap.parse_activity(data) activity = ap.parse_activity(data)
logger.debug(f"inbox activity={activity}/{data}") logger.debug(f"inbox activity={g.request_id}/{activity}/{data}")
post_to_inbox(activity) post_to_inbox(activity)
return Response(status=201) return Response(status=201)
@ -741,7 +732,7 @@ def followers():
@app.route("/following") @app.route("/following")
def following(): def following():
q = {"box": Box.OUTBOX.value, "type": ActivityType.FOLLOW.value, "meta.undo": False} q = {**in_outbox(), **by_type(ActivityType.FOLLOW), **not_undo()}
if is_api_request(): if is_api_request():
return jsonify( return jsonify(

View file

@ -196,7 +196,7 @@ def task_finish_post_to_inbox() -> _Response:
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError): except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
app.logger.exception(f"no retry") app.logger.exception(f"no retry")
except Exception as err: except Exception as err:
app.logger.exception(f"failed to cache attachments for {iri}") app.logger.exception(f"failed to cfinish post to inbox for {iri}")
raise TaskError() from err raise TaskError() from err
return "" return ""

View file

@ -406,6 +406,8 @@ class MicroblogPubBackend(Backend):
new_threads = [] new_threads = []
root_reply = in_reply_to root_reply = in_reply_to
reply = ap.fetch_remote_activity(root_reply) reply = ap.fetch_remote_activity(root_reply)
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
# FIXME(tsileo): can be a 403 too, in this case what to do? not error at least
# Ensure the this is a local reply, of a question, with a direct "to" addressing # Ensure the this is a local reply, of a question, with a direct "to" addressing
if ( if (
@ -427,7 +429,6 @@ class MicroblogPubBackend(Backend):
) )
return None return None
print(f"processing {create!r} and incrementing {in_reply_to}")
creply = DB.activities.find_one_and_update( creply = DB.activities.find_one_and_update(
{"activity.object.id": in_reply_to}, {"activity.object.id": in_reply_to},
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}}, {"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
@ -444,6 +445,7 @@ class MicroblogPubBackend(Backend):
break break
root_reply = in_reply_to root_reply = in_reply_to
reply = ap.fetch_remote_activity(root_reply) reply = ap.fetch_remote_activity(root_reply)
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
q = {"activity.object.id": root_reply} q = {"activity.object.id": root_reply}
if not DB.activities.count(q): if not DB.activities.count(q):
save(Box.REPLIES, reply) save(Box.REPLIES, reply)

View file

@ -38,8 +38,9 @@ def process_inbox(activity: ap.BaseActivity, new_meta: _NewMeta) -> None:
def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None: def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None:
_logger.info(f"process_inbox activity={delete!r}") _logger.info(f"process_inbox activity={delete!r}")
obj_id = delete.get_object_id() obj_id = delete.get_object_id()
_logger.debug("delete object={obj_id}") _logger.debug(f"delete object={obj_id}")
try: try:
# FIXME(tsileo): call the DB here instead? like for the oubox
obj = ap.fetch_remote_activity(obj_id) obj = ap.fetch_remote_activity(obj_id)
_logger.info(f"inbox_delete handle_replies obj={obj!r}") _logger.info(f"inbox_delete handle_replies obj={obj!r}")
in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None

38
utils/blacklist.py Normal file
View file

@ -0,0 +1,38 @@
import logging
from typing import Any
from typing import Dict
from urllib.parse import urlparse
import config
logger = logging.getLogger(__name__)
def is_url_blacklisted(url: str) -> bool:
try:
return urlparse(url).netloc in config.BLACKLIST
except Exception:
logger.exception(f"failed to blacklist for {url}")
return False
def is_blacklisted(data: Dict[str, Any]) -> bool:
"""Returns True if the activity is coming/or referencing a blacklisted host."""
if (
"id" in data
and is_url_blacklisted(data["id"])
or (
"object" in data
and isinstance(data["object"], dict)
and "id" in data["object"]
and is_url_blacklisted(data["object"]["id"])
)
or (
"object" in data
and isinstance(data["object"], str)
and is_url_blacklisted(data["object"])
)
):
return True
return False

View file

@ -1,6 +1,7 @@
import base64 import base64
import mimetypes import mimetypes
from enum import Enum from enum import Enum
from enum import unique
from gzip import GzipFile from gzip import GzipFile
from io import BytesIO from io import BytesIO
from typing import Any from typing import Any
@ -13,7 +14,7 @@ from little_boxes import activitypub as ap
from PIL import Image from PIL import Image
def load(url, user_agent): def load(url: str, user_agent: str) -> Image:
"""Initializes a `PIL.Image` from the URL.""" """Initializes a `PIL.Image` from the URL."""
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp: with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status() resp.raise_for_status()
@ -22,7 +23,7 @@ def load(url, user_agent):
return Image.open(BytesIO(resp.raw.read())) return Image.open(BytesIO(resp.raw.read()))
def to_data_uri(img): def to_data_uri(img: Image) -> str:
out = BytesIO() out = BytesIO()
img.save(out, format=img.format) img.save(out, format=img.format)
out.seek(0) out.seek(0)
@ -30,6 +31,7 @@ def to_data_uri(img):
return f"data:{img.get_format_mimetype()};base64,{data}" return f"data:{img.get_format_mimetype()};base64,{data}"
@unique
class Kind(Enum): class Kind(Enum):
ATTACHMENT = "attachment" ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon" ACTOR_ICON = "actor_icon"

View file

@ -1,4 +1,7 @@
import logging import logging
from typing import Any
from typing import Dict
from typing import Set
from urllib.parse import urlparse from urllib.parse import urlparse
import opengraph import opengraph
@ -14,7 +17,7 @@ from .lookup import lookup
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def links_from_note(note): def links_from_note(note: Dict[str, Any]) -> Set[str]:
note_host = urlparse(ap._get_id(note["id"]) or "").netloc note_host = urlparse(ap._get_id(note["id"]) or "").netloc
links = set() links = set()

View file

@ -170,7 +170,6 @@ def url_or_id(d):
@filters.app_template_filter() @filters.app_template_filter()
def get_url(u): def get_url(u):
print(f"GET_URL({u!r})")
if isinstance(u, list): if isinstance(u, list):
for l in u: for l in u:
if l.get("mimeType") == "text/html": if l.get("mimeType") == "text/html":
@ -191,7 +190,6 @@ def get_actor(url):
url = url[0] url = url[0]
if isinstance(url, dict): if isinstance(url, dict):
url = url.get("id") url = url.get("id")
print(f"GET_ACTOR {url}")
try: try:
return ap.get_backend().fetch_iri(url) return ap.get_backend().fetch_iri(url)
except (ActivityNotFoundError, ActivityGoneError): except (ActivityNotFoundError, ActivityGoneError):
@ -203,8 +201,6 @@ def get_actor(url):
@filters.app_template_filter() @filters.app_template_filter()
def get_answer_count(choice, obj, meta): def get_answer_count(choice, obj, meta):
count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0) count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0)
print(count_from_meta)
print(choice, obj, meta)
if count_from_meta: if count_from_meta:
return count_from_meta return count_from_meta
for option in obj.get("oneOf", obj.get("anyOf", [])): for option in obj.get("oneOf", obj.get("anyOf", [])):
@ -219,7 +215,6 @@ def get_total_answers_count(obj, meta):
return cached return cached
cnt = 0 cnt = 0
for choice in obj.get("anyOf", obj.get("oneOf", [])): for choice in obj.get("anyOf", obj.get("oneOf", [])):
print(choice)
cnt += choice.get("replies", {}).get("totalItems", 0) cnt += choice.get("replies", {}).get("totalItems", 0)
return cnt return cnt