Fix OG parsing

2019-08-17 21:32:52 +02:00 · 2019-08-17 21:32:52 +02:00 · 369c380054
commit 369c380054
parent 6d1b342af1
1 changed files with 9 additions and 1 deletions
--- a/utils/opengraph.py
+++ b/utils/opengraph.py
@ -74,7 +74,9 @@ def fetch_og_metadata(user_agent, links):
            logger.debug(f"failed to HEAD {l}: {err!r}")
            continue

-        if not h.headers.get("content-type").startswith("text/html"):
+        if h.headers.get("content-type") and not h.headers.get(
+            "content-type"
+        ).startswith("text/html"):
            logger.debug(f"skipping {l} for bad content type")
            continue

@ -92,6 +94,12 @@ def fetch_og_metadata(user_agent, links):
            logger.debug(f"failed to GET {l}: {err!r}")
            continue

+        # FIXME(tsileo): check mimetype via the URL too (like we do for images)
+        if not r.headers.get("content-type") or not r.headers.get(
+            "content-type"
+        ).startswith("text/html"):
+            continue
+
        r.encoding = "UTF-8"
        html = r.text
        try: