Introduce the requests library.

Replace the combination of urllib, beautifulsoup and lxml with the requests library.
This commit is contained in:
Alexander Cecile
2020-08-17 19:52:11 -04:00
parent 5340f9adfe
commit 5186071948
3 changed files with 5 additions and 83 deletions

View File

@@ -1,3 +1 @@
lxml>=4.2.4,<=5.0 requests
beautifulsoup4>=4.6.1,<=5.0
flake8>=3.8,<=4.0

View File

@@ -1615,47 +1615,6 @@ class DomainToIDNA(Base):
self.assertEqual(actual, expected) self.assertEqual(actual, expected)
class GetFileByUrl(BaseStdout):
@mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open)
def test_read_url(self, _):
url = b"www.google.com"
expected = "www.google.com"
actual = get_file_by_url(url, delay=0)
self.assertEqual(actual, expected)
@mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_fail)
def test_read_url_fail(self, _):
url = b"www.google.com"
self.assertIsNone(get_file_by_url(url, delay=0))
expected = "Problem getting file:"
output = sys.stdout.getvalue()
self.assertIn(expected, output)
@mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_read_fail)
def test_read_url_read_fail(self, _):
url = b"www.google.com"
self.assertIsNone(get_file_by_url(url, delay=0))
expected = "Problem getting file:"
output = sys.stdout.getvalue()
self.assertIn(expected, output)
@mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_decode_fail)
def test_read_url_decode_fail(self, _):
url = b"www.google.com"
self.assertIsNone(get_file_by_url(url, delay=0))
expected = "Problem getting file:"
output = sys.stdout.getvalue()
self.assertIn(expected, output)
class TestWriteData(Base): class TestWriteData(Base):
def test_write_basic(self): def test_write_basic(self):
f = BytesIO() f = BytesIO()

View File

@@ -21,15 +21,12 @@ import tempfile
import time import time
from glob import glob from glob import glob
import lxml # noqa: F401 import requests
from bs4 import BeautifulSoup
# Detecting Python 3 for version-dependent implementations # Detecting Python 3 for version-dependent implementations
PY3 = sys.version_info >= (3, 0) PY3 = sys.version_info >= (3, 0)
if PY3: if not PY3:
from urllib.request import urlopen
else:
raise Exception("We do not support Python 2 anymore.") raise Exception("We do not support Python 2 anymore.")
# Syntactic sugar for "sudo" command in UNIX / Linux # Syntactic sugar for "sudo" command in UNIX / Linux
@@ -1469,40 +1466,8 @@ def maybe_copy_example_file(file_path):
shutil.copyfile(example_file_path, file_path) shutil.copyfile(example_file_path, file_path)
def get_file_by_url(url, retries=3, delay=10): def get_file_by_url(url, params, **kwargs):
""" return requests.get(url=url, params=params, **kwargs).text
Get a file data located at a particular URL.
Parameters
----------
url : str
The URL at which to access the data.
Returns
-------
url_data : str or None
The data retrieved at that URL from the file. Returns None if the
attempted retrieval is unsuccessful.
Note
----
- BeautifulSoup is used in this case to avoid having to search in which
format we have to encode or decode data before parsing it to UTF-8.
"""
while retries:
try:
with urlopen(url) as f:
soup = BeautifulSoup(f.read(), "lxml").get_text()
return "\n".join(list(map(domain_to_idna, soup.split("\n"))))
except Exception as e:
if 'failure in name resolution' in str(e):
print('No internet connection! Retrying in {} seconds'.format(delay))
time.sleep(delay)
retries -= 1
continue
break
print("Problem getting file: ", url)
def write_data(f, data): def write_data(f, data):