From a3267971e828d7d018877366bd9299db399e1267 Mon Sep 17 00:00:00 2001 From: Thomas Sileo Date: Sat, 26 May 2018 00:03:30 +0200 Subject: [PATCH] More url checking --- utils/actor_service.py | 5 +++++ utils/object_service.py | 3 +++ utils/opengraph.py | 3 ++- 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/utils/actor_service.py b/utils/actor_service.py index af7acc8..6c1f35a 100644 --- a/utils/actor_service.py +++ b/utils/actor_service.py @@ -4,6 +4,8 @@ import requests from urllib.parse import urlparse from Crypto.PublicKey import RSA +from .urlutils import check_url + logger = logging.getLogger(__name__) @@ -23,6 +25,9 @@ class ActorService(object): def _fetch(self, actor_url): logger.debug(f'fetching remote object {actor_url}') + + check_url(actor_url) + resp = requests.get(actor_url, headers={ 'Accept': 'application/activity+json', 'User-Agent': self._user_agent, diff --git a/utils/object_service.py b/utils/object_service.py index 185488f..9445550 100644 --- a/utils/object_service.py +++ b/utils/object_service.py @@ -1,6 +1,8 @@ import requests from urllib.parse import urlparse +from .urlutils import check_url + class ObjectService(object): def __init__(self, user_agent, col, inbox, outbox, instances): @@ -13,6 +15,7 @@ class ObjectService(object): def _fetch_remote(self, object_id): print(f'fetch remote {object_id}') + check_url(object_id) resp = requests.get(object_id, headers={ 'Accept': 'application/activity+json', 'User-Agent': self._user_agent, diff --git a/utils/opengraph.py b/utils/opengraph.py index e67a5a8..a53c07b 100644 --- a/utils/opengraph.py +++ b/utils/opengraph.py @@ -5,7 +5,7 @@ import opengraph import requests from bs4 import BeautifulSoup -from .urlutils import is_url_valid +from .urlutils import is_url_valid, check_url def links_from_note(note): @@ -38,6 +38,7 @@ def fetch_og_metadata(user_agent, col, remote_id): # FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph htmls = [] for l in links: + check_url(l) r = requests.get(l, headers={'User-Agent': user_agent}) r.raise_for_status() htmls.append(r.text)