From c585f0785726fd01d758f36f826169f6a0946019 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Sun, 5 Aug 2018 14:24:52 +0200 Subject: [PATCH] More opengraph tweaks --- utils/opengraph.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/utils/opengraph.py b/utils/opengraph.py index 30c752b..5401131 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -1,3 +1,4 @@ +import logging import opengraph import requests from bs4 import BeautifulSoup @@ -8,6 +9,8 @@ from little_boxes.urlutils import is_url_valid from .lookup import lookup +logger = logging.getLogger(__name__) + def links_from_note(note): tags_href = set() @@ -27,7 +30,7 @@ def links_from_note(note): def fetch_og_metadata(user_agent, links): - htmls = [] + res = [] for l in links: check_url(l) @@ -41,11 +44,13 @@ def fetch_og_metadata(user_agent, links): r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) r.raise_for_status() - htmls.append(r.text) - res = [] - for html in htmls: - data = dict(opengraph.OpenGraph(html=html)) + html = r.text + try: + data = dict(opengraph.OpenGraph(html=html)) + except Exception: + logger.exception("failed to parse {l}") + continue if data.get("url"): res.append(data)