Fix OG parsing

This commit is contained in:
Thomas Sileo 2019-08-17 21:32:52 +02:00
parent 6d1b342af1
commit 369c380054

View file

@ -74,7 +74,9 @@ def fetch_og_metadata(user_agent, links):
logger.debug(f"failed to HEAD {l}: {err!r}")
continue
if not h.headers.get("content-type").startswith("text/html"):
if h.headers.get("content-type") and not h.headers.get(
"content-type"
).startswith("text/html"):
logger.debug(f"skipping {l} for bad content type")
continue
@ -92,6 +94,12 @@ def fetch_og_metadata(user_agent, links):
logger.debug(f"failed to GET {l}: {err!r}")
continue
# FIXME(tsileo): check mimetype via the URL too (like we do for images)
if not r.headers.get("content-type") or not r.headers.get(
"content-type"
).startswith("text/html"):
continue
r.encoding = "UTF-8"
html = r.text
try: