From 8aba17f7426181786d80a5e4ac2ee14c6c6c6e28 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Fri, 5 Jul 2019 10:42:04 +0200 Subject: [PATCH] Improve OpenGrah support --- utils/opengraph.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/utils/opengraph.py b/utils/opengraph.py index 5163b85..d96bd28 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -1,4 +1,5 @@ import logging +import urllib import opengraph import requests @@ -59,6 +60,22 @@ def fetch_og_metadata(user_agent, links): except Exception: logger.exception(f"failed to parse {l}") continue + + # Keep track of the fetched URL as some crappy websites use relative URLs everywhere + data["_input_url"] = l + u = urllib.parse.urlparse(l) + + # If it's a relative URL, build the absolute version + if "image" in data and data["image"].startswith("/"): + data["image"] = u._replace( + path=data["image"], params="", query="", fragment="" + ).geturl() + + if "url" in data and data["url"].startswith("/"): + data["url"] = u._replace( + path=data["url"], params="", query="", fragment="" + ).geturl() + if data.get("url"): res.append(data)