More url checking
This commit is contained in:
parent
06f4f824d8
commit
a3267971e8
3 changed files with 10 additions and 1 deletions
|
@ -4,6 +4,8 @@ import requests
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
from Crypto.PublicKey import RSA
|
from Crypto.PublicKey import RSA
|
||||||
|
|
||||||
|
from .urlutils import check_url
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@ -23,6 +25,9 @@ class ActorService(object):
|
||||||
|
|
||||||
def _fetch(self, actor_url):
|
def _fetch(self, actor_url):
|
||||||
logger.debug(f'fetching remote object {actor_url}')
|
logger.debug(f'fetching remote object {actor_url}')
|
||||||
|
|
||||||
|
check_url(actor_url)
|
||||||
|
|
||||||
resp = requests.get(actor_url, headers={
|
resp = requests.get(actor_url, headers={
|
||||||
'Accept': 'application/activity+json',
|
'Accept': 'application/activity+json',
|
||||||
'User-Agent': self._user_agent,
|
'User-Agent': self._user_agent,
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import requests
|
import requests
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from .urlutils import check_url
|
||||||
|
|
||||||
|
|
||||||
class ObjectService(object):
|
class ObjectService(object):
|
||||||
def __init__(self, user_agent, col, inbox, outbox, instances):
|
def __init__(self, user_agent, col, inbox, outbox, instances):
|
||||||
|
@ -13,6 +15,7 @@ class ObjectService(object):
|
||||||
|
|
||||||
def _fetch_remote(self, object_id):
|
def _fetch_remote(self, object_id):
|
||||||
print(f'fetch remote {object_id}')
|
print(f'fetch remote {object_id}')
|
||||||
|
check_url(object_id)
|
||||||
resp = requests.get(object_id, headers={
|
resp = requests.get(object_id, headers={
|
||||||
'Accept': 'application/activity+json',
|
'Accept': 'application/activity+json',
|
||||||
'User-Agent': self._user_agent,
|
'User-Agent': self._user_agent,
|
||||||
|
|
|
@ -5,7 +5,7 @@ import opengraph
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from .urlutils import is_url_valid
|
from .urlutils import is_url_valid, check_url
|
||||||
|
|
||||||
|
|
||||||
def links_from_note(note):
|
def links_from_note(note):
|
||||||
|
@ -38,6 +38,7 @@ def fetch_og_metadata(user_agent, col, remote_id):
|
||||||
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
|
# FIXME(tsileo): set the user agent by giving HTML directly to OpenGraph
|
||||||
htmls = []
|
htmls = []
|
||||||
for l in links:
|
for l in links:
|
||||||
|
check_url(l)
|
||||||
r = requests.get(l, headers={'User-Agent': user_agent})
|
r = requests.get(l, headers={'User-Agent': user_agent})
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
htmls.append(r.text)
|
htmls.append(r.text)
|
||||||
|
|
Loading…
Reference in a new issue