Improve OpenGrah support

This commit is contained in:
Thomas Sileo 2019-07-05 10:42:04 +02:00
parent abbcebb81d
commit 8aba17f742

View file

@ -1,4 +1,5 @@
import logging import logging
import urllib
import opengraph import opengraph
import requests import requests
@ -59,6 +60,22 @@ def fetch_og_metadata(user_agent, links):
except Exception: except Exception:
logger.exception(f"failed to parse {l}") logger.exception(f"failed to parse {l}")
continue continue
# Keep track of the fetched URL as some crappy websites use relative URLs everywhere
data["_input_url"] = l
u = urllib.parse.urlparse(l)
# If it's a relative URL, build the absolute version
if "image" in data and data["image"].startswith("/"):
data["image"] = u._replace(
path=data["image"], params="", query="", fragment=""
).geturl()
if "url" in data and data["url"].startswith("/"):
data["url"] = u._replace(
path=data["url"], params="", query="", fragment=""
).geturl()
if data.get("url"): if data.get("url"):
res.append(data) res.append(data)