More opengraph tweaks

This commit is contained in:
Thomas Sileo 2018-08-05 14:24:52 +02:00
parent b43fa4556e
commit c585f07857

View file

@ -1,3 +1,4 @@
import logging
import opengraph import opengraph
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
@ -8,6 +9,8 @@ from little_boxes.urlutils import is_url_valid
from .lookup import lookup from .lookup import lookup
logger = logging.getLogger(__name__)
def links_from_note(note): def links_from_note(note):
tags_href = set() tags_href = set()
@ -27,7 +30,7 @@ def links_from_note(note):
def fetch_og_metadata(user_agent, links): def fetch_og_metadata(user_agent, links):
htmls = [] res = []
for l in links: for l in links:
check_url(l) check_url(l)
@ -41,11 +44,13 @@ def fetch_og_metadata(user_agent, links):
r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15) r = requests.get(l, headers={"User-Agent": user_agent}, timeout=15)
r.raise_for_status() r.raise_for_status()
htmls.append(r.text)
res = [] html = r.text
for html in htmls: try:
data = dict(opengraph.OpenGraph(html=html)) data = dict(opengraph.OpenGraph(html=html))
except Exception:
logger.exception("failed to parse {l}")
continue
if data.get("url"): if data.get("url"):
res.append(data) res.append(data)