More url checking

This commit is contained in:
Thomas Sileo 2018-05-26 00:03:30 +02:00
parent 06f4f824d8
commit a3267971e8
3 changed files with 10 additions and 1 deletions

View file

@ -4,6 +4,8 @@ import requests
from urllib.parse import urlparse from urllib.parse import urlparse
from Crypto.PublicKey import RSA from Crypto.PublicKey import RSA
from .urlutils import check_url
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -23,6 +25,9 @@ class ActorService(object):
def _fetch(self, actor_url): def _fetch(self, actor_url):
logger.debug(f'fetching remote object {actor_url}') logger.debug(f'fetching remote object {actor_url}')
check_url(actor_url)
resp = requests.get(actor_url, headers={ resp = requests.get(actor_url, headers={
'Accept': 'application/activity+json', 'Accept': 'application/activity+json',
'User-Agent': self._user_agent, 'User-Agent': self._user_agent,

View file

@ -1,6 +1,8 @@
import requests import requests
from urllib.parse import urlparse from urllib.parse import urlparse
from .urlutils import check_url
class ObjectService(object): class ObjectService(object):
def __init__(self, user_agent, col, inbox, outbox, instances): def __init__(self, user_agent, col, inbox, outbox, instances):
@ -13,6 +15,7 @@ class ObjectService(object):
def _fetch_remote(self, object_id): def _fetch_remote(self, object_id):
print(f'fetch remote {object_id}') print(f'fetch remote {object_id}')
check_url(object_id)
resp = requests.get(object_id, headers={ resp = requests.get(object_id, headers={
'Accept': 'application/activity+json', 'Accept': 'application/activity+json',
'User-Agent': self._user_agent, 'User-Agent': self._user_agent,

View file

@ -5,7 +5,7 @@ import opengraph
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from .urlutils import is_url_valid from .urlutils import is_url_valid, check_url
def links_from_note(note): def links_from_note(note):
@ -38,6 +38,7 @@ def fetch_og_metadata(user_agent, col, remote_id):
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph # FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
htmls = [] htmls = []
for l in links: for l in links:
check_url(l)
r = requests.get(l, headers={'User-Agent': user_agent}) r = requests.get(l, headers={'User-Agent': user_agent})
r.raise_for_status() r.raise_for_status()
htmls.append(r.text) htmls.append(r.text)