Fix OG parsing
This commit is contained in:
parent
6d1b342af1
commit
369c380054
1 changed files with 9 additions and 1 deletions
|
@ -74,7 +74,9 @@ def fetch_og_metadata(user_agent, links):
|
|||
logger.debug(f"failed to HEAD {l}: {err!r}")
|
||||
continue
|
||||
|
||||
if not h.headers.get("content-type").startswith("text/html"):
|
||||
if h.headers.get("content-type") and not h.headers.get(
|
||||
"content-type"
|
||||
).startswith("text/html"):
|
||||
logger.debug(f"skipping {l} for bad content type")
|
||||
continue
|
||||
|
||||
|
@ -92,6 +94,12 @@ def fetch_og_metadata(user_agent, links):
|
|||
logger.debug(f"failed to GET {l}: {err!r}")
|
||||
continue
|
||||
|
||||
# FIXME(tsileo): check mimetype via the URL too (like we do for images)
|
||||
if not r.headers.get("content-type") or not r.headers.get(
|
||||
"content-type"
|
||||
).startswith("text/html"):
|
||||
continue
|
||||
|
||||
r.encoding = "UTF-8"
|
||||
html = r.text
|
||||
try:
|
||||
|
|
Loading…
Reference in a new issue