Cleanup and add a unique request ID

This commit is contained in:
Thomas Sileo 2019-08-05 22:40:24 +02:00
parent 16f4af0463
commit 5ea22edcb8
8 changed files with 86 additions and 54 deletions

77
app.py
View file

@ -3,12 +3,13 @@ import logging
import os
import traceback
from datetime import datetime
from urllib.parse import urlparse
from uuid import uuid4
from bson.objectid import ObjectId
from flask import Flask
from flask import Response
from flask import abort
from flask import g
from flask import jsonify as flask_jsonify
from flask import redirect
from flask import render_template
@ -49,8 +50,10 @@ from core.meta import Box
from core.meta import MetaKey
from core.meta import _meta
from core.meta import by_remote_id
from core.meta import by_type
from core.meta import in_outbox
from core.meta import is_public
from core.meta import not_undo
from core.shared import _build_thread
from core.shared import _get_ip
from core.shared import csrf
@ -59,6 +62,7 @@ from core.shared import jsonify
from core.shared import login_required
from core.shared import noindex
from core.shared import paginated_query
from utils.blacklist import is_blacklisted
from utils.key import get_secret_key
from utils.template_filters import filters
@ -87,14 +91,6 @@ else:
root_logger.setLevel(gunicorn_logger.level)
def is_blacklisted(url: str) -> bool:
try:
return urlparse(url).netloc in config.BLACKLIST
except Exception:
logger.exception(f"failed to blacklist for {url}")
return False
@app.context_processor
def inject_config():
q = {
@ -153,18 +149,24 @@ def inject_config():
)
@app.before_request
def generate_request_id():
g.request_id = uuid4().hex
@app.after_request
def set_x_powered_by(response):
response.headers["X-Powered-By"] = "microblog.pub"
response.headers["X-Request-ID"] = g.request_id
return response
@app.errorhandler(ValueError)
def handle_value_error(error):
logger.error(
f"caught value error: {error!r}, {traceback.format_tb(error.__traceback__)}"
f"caught value error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
)
response = flask_jsonify(message=error.args[0])
response = flask_jsonify(message=error.args[0], request_id=g.request_id)
response.status_code = 400
return response
@ -172,9 +174,9 @@ def handle_value_error(error):
@app.errorhandler(Error)
def handle_activitypub_error(error):
logger.error(
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
)
response = flask_jsonify(error.to_dict())
response = flask_jsonify({**error.to_dict(), "request_id": g.request_id})
response.status_code = error.status_code
return response
@ -182,9 +184,9 @@ def handle_activitypub_error(error):
@app.errorhandler(TaskError)
def handle_task_error(error):
logger.error(
f"caught activitypub error {error!r}, {traceback.format_tb(error.__traceback__)}"
f"caught activitypub error for {g.request_id}: {error!r}, {traceback.format_tb(error.__traceback__)}"
)
response = flask_jsonify({"traceback": error.message})
response = flask_jsonify({"traceback": error.message, "request_id": g.request_id})
response.status_code = 500
return response
@ -274,7 +276,6 @@ def index():
"meta.public": True,
"$or": [{"meta.pinned": False}, {"meta.pinned": {"$exists": False}}],
}
print(list(DB.activities.find(q)))
pinned = []
# Only fetch the pinned notes if we're on the first page
@ -417,7 +418,6 @@ def outbox():
abort(401)
data = request.get_json(force=True)
print(data)
activity = ap.parse_activity(data)
activity_id = post_to_outbox(activity)
@ -603,32 +603,21 @@ def inbox():
return Response(
status=422,
headers={"Content-Type": "application/json"},
response=json.dumps({"error": "failed to decode request as JSON"}),
response=json.dumps(
{
"error": "failed to decode request body as JSON",
"request_id": g.request_id,
}
),
)
# Check the blacklist now to see if we can return super early
if (
"id" in data
and is_blacklisted(data["id"])
or (
"object" in data
and isinstance(data["object"], dict)
and "id" in data["object"]
and is_blacklisted(data["object"]["id"])
)
or (
"object" in data
and isinstance(data["object"], str)
and is_blacklisted(data["object"])
)
):
if is_blacklisted(data):
logger.info(f"dropping activity from blacklisted host: {data['id']}")
return Response(status=201)
print(f"req_headers={request.headers}")
print(f"raw_data={data}")
logger.debug(f"req_headers={request.headers}")
logger.debug(f"raw_data={data}")
logger.info(f"request_id={g.request_id} req_headers={request.headers!r}")
logger.info(f"request_id={g.request_id} raw_data={data}")
try:
if not verify_request(
request.method, request.path, request.headers, request.data
@ -636,7 +625,7 @@ def inbox():
raise Exception("failed to verify request")
except Exception:
logger.exception(
"failed to verify request, trying to verify the payload by fetching the remote"
f"failed to verify request {g.request_id}, trying to verify the payload by fetching the remote"
)
try:
remote_data = get_backend().fetch_iri(data["id"])
@ -669,7 +658,8 @@ def inbox():
headers={"Content-Type": "application/json"},
response=json.dumps(
{
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
"request_id": g.request_id,
}
),
)
@ -688,6 +678,7 @@ def inbox():
"geoip": geoip,
"tb": traceback.format_exc(),
"headers": dict(request.headers),
"request_id": g.request_id,
},
}
)
@ -697,16 +688,16 @@ def inbox():
headers={"Content-Type": "application/json"},
response=json.dumps(
{
"error": "failed to verify request (using HTTP signatures or fetching the IRI)"
"error": "failed to verify request (using HTTP signatures or fetching the IRI)",
"request_id": g.request_id,
}
),
)
# We fetched the remote data successfully
data = remote_data
print(data)
activity = ap.parse_activity(data)
logger.debug(f"inbox activity={activity}/{data}")
logger.debug(f"inbox activity={g.request_id}/{activity}/{data}")
post_to_inbox(activity)
return Response(status=201)
@ -741,7 +732,7 @@ def followers():
@app.route("/following")
def following():
q = {"box": Box.OUTBOX.value, "type": ActivityType.FOLLOW.value, "meta.undo": False}
q = {**in_outbox(), **by_type(ActivityType.FOLLOW), **not_undo()}
if is_api_request():
return jsonify(

View file

@ -196,7 +196,7 @@ def task_finish_post_to_inbox() -> _Response:
except (ActivityGoneError, ActivityNotFoundError, NotAnActivityError):
app.logger.exception(f"no retry")
except Exception as err:
app.logger.exception(f"failed to cache attachments for {iri}")
app.logger.exception(f"failed to cfinish post to inbox for {iri}")
raise TaskError() from err
return ""

View file

@ -406,6 +406,8 @@ class MicroblogPubBackend(Backend):
new_threads = []
root_reply = in_reply_to
reply = ap.fetch_remote_activity(root_reply)
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
# FIXME(tsileo): can be a 403 too, in this case what to do? not error at least
# Ensure the this is a local reply, of a question, with a direct "to" addressing
if (
@ -427,7 +429,6 @@ class MicroblogPubBackend(Backend):
)
return None
print(f"processing {create!r} and incrementing {in_reply_to}")
creply = DB.activities.find_one_and_update(
{"activity.object.id": in_reply_to},
{"$inc": {"meta.count_reply": 1, "meta.count_direct_reply": 1}},
@ -444,6 +445,7 @@ class MicroblogPubBackend(Backend):
break
root_reply = in_reply_to
reply = ap.fetch_remote_activity(root_reply)
# FIXME(tsileo): can be a Create here (instead of a Note, Hubzilla's doing that)
q = {"activity.object.id": root_reply}
if not DB.activities.count(q):
save(Box.REPLIES, reply)

View file

@ -38,8 +38,9 @@ def process_inbox(activity: ap.BaseActivity, new_meta: _NewMeta) -> None:
def _delete_process_inbox(delete: ap.Delete, new_meta: _NewMeta) -> None:
_logger.info(f"process_inbox activity={delete!r}")
obj_id = delete.get_object_id()
_logger.debug("delete object={obj_id}")
_logger.debug(f"delete object={obj_id}")
try:
# FIXME(tsileo): call the DB here instead? like for the oubox
obj = ap.fetch_remote_activity(obj_id)
_logger.info(f"inbox_delete handle_replies obj={obj!r}")
in_reply_to = obj.get_in_reply_to() if obj.inReplyTo else None

38
utils/blacklist.py Normal file
View file

@ -0,0 +1,38 @@
import logging
from typing import Any
from typing import Dict
from urllib.parse import urlparse
import config
logger = logging.getLogger(__name__)
def is_url_blacklisted(url: str) -> bool:
try:
return urlparse(url).netloc in config.BLACKLIST
except Exception:
logger.exception(f"failed to blacklist for {url}")
return False
def is_blacklisted(data: Dict[str, Any]) -> bool:
"""Returns True if the activity is coming/or referencing a blacklisted host."""
if (
"id" in data
and is_url_blacklisted(data["id"])
or (
"object" in data
and isinstance(data["object"], dict)
and "id" in data["object"]
and is_url_blacklisted(data["object"]["id"])
)
or (
"object" in data
and isinstance(data["object"], str)
and is_url_blacklisted(data["object"])
)
):
return True
return False

View file

@ -1,6 +1,7 @@
import base64
import mimetypes
from enum import Enum
from enum import unique
from gzip import GzipFile
from io import BytesIO
from typing import Any
@ -13,7 +14,7 @@ from little_boxes import activitypub as ap
from PIL import Image
def load(url, user_agent):
def load(url: str, user_agent: str) -> Image:
"""Initializes a `PIL.Image` from the URL."""
with requests.get(url, stream=True, headers={"User-Agent": user_agent}) as resp:
resp.raise_for_status()
@ -22,7 +23,7 @@ def load(url, user_agent):
return Image.open(BytesIO(resp.raw.read()))
def to_data_uri(img):
def to_data_uri(img: Image) -> str:
out = BytesIO()
img.save(out, format=img.format)
out.seek(0)
@ -30,6 +31,7 @@ def to_data_uri(img):
return f"data:{img.get_format_mimetype()};base64,{data}"
@unique
class Kind(Enum):
ATTACHMENT = "attachment"
ACTOR_ICON = "actor_icon"

View file

@ -1,4 +1,7 @@
import logging
from typing import Any
from typing import Dict
from typing import Set
from urllib.parse import urlparse
import opengraph
@ -14,7 +17,7 @@ from .lookup import lookup
logger = logging.getLogger(__name__)
def links_from_note(note):
def links_from_note(note: Dict[str, Any]) -> Set[str]:
note_host = urlparse(ap._get_id(note["id"]) or "").netloc
links = set()

View file

@ -170,7 +170,6 @@ def url_or_id(d):
@filters.app_template_filter()
def get_url(u):
print(f"GET_URL({u!r})")
if isinstance(u, list):
for l in u:
if l.get("mimeType") == "text/html":
@ -191,7 +190,6 @@ def get_actor(url):
url = url[0]
if isinstance(url, dict):
url = url.get("id")
print(f"GET_ACTOR {url}")
try:
return ap.get_backend().fetch_iri(url)
except (ActivityNotFoundError, ActivityGoneError):
@ -203,8 +201,6 @@ def get_actor(url):
@filters.app_template_filter()
def get_answer_count(choice, obj, meta):
count_from_meta = meta.get("question_answers", {}).get(_answer_key(choice), 0)
print(count_from_meta)
print(choice, obj, meta)
if count_from_meta:
return count_from_meta
for option in obj.get("oneOf", obj.get("anyOf", [])):
@ -219,7 +215,6 @@ def get_total_answers_count(obj, meta):
return cached
cnt = 0
for choice in obj.get("anyOf", obj.get("oneOf", [])):
print(choice)
cnt += choice.get("replies", {}).get("totalItems", 0)
return cnt