Introduce the requests library.

Replace the combination of urllib, beautifulsoup and lxml with the requests library.
2026-07-01 10:46:51 +00:00 · 2020-08-17 19:52:11 -04:00
parent 5340f9adfe
commit 5186071948
3 changed files with 5 additions and 83 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1 @@
-lxml>=4.2.4,<=5.0
+requests
 beautifulsoup4>=4.6.1,<=5.0
 flake8>=3.8,<=4.0
--- a/testUpdateHostsFile.py
+++ b/testUpdateHostsFile.py
@@ -1615,47 +1615,6 @@ class DomainToIDNA(Base):
            self.assertEqual(actual, expected)
 class GetFileByUrl(BaseStdout):
    @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open)
    def test_read_url(self, _):
        url = b"www.google.com"
        expected = "www.google.com"
        actual = get_file_by_url(url, delay=0)
        self.assertEqual(actual, expected)
    @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_fail)
    def test_read_url_fail(self, _):
        url = b"www.google.com"
        self.assertIsNone(get_file_by_url(url, delay=0))
        expected = "Problem getting file:"
        output = sys.stdout.getvalue()
        self.assertIn(expected, output)
    @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_read_fail)
    def test_read_url_read_fail(self, _):
        url = b"www.google.com"
        self.assertIsNone(get_file_by_url(url, delay=0))
        expected = "Problem getting file:"
        output = sys.stdout.getvalue()
        self.assertIn(expected, output)
    @mock.patch("updateHostsFile.urlopen", side_effect=mock_url_open_decode_fail)
    def test_read_url_decode_fail(self, _):
        url = b"www.google.com"
        self.assertIsNone(get_file_by_url(url, delay=0))
        expected = "Problem getting file:"
        output = sys.stdout.getvalue()
        self.assertIn(expected, output)
 class TestWriteData(Base):
    def test_write_basic(self):
        f = BytesIO()
--- a/updateHostsFile.py
+++ b/updateHostsFile.py
@@ -21,15 +21,12 @@ import tempfile
 import time
 from glob import glob
-import lxml  # noqa: F401
+import requests
 from bs4 import BeautifulSoup
 # Detecting Python 3 for version-dependent implementations
 PY3 = sys.version_info >= (3, 0)
-if PY3:
+if not PY3:
    from urllib.request import urlopen
 else:
    raise Exception("We do not support Python 2 anymore.")
 # Syntactic sugar for "sudo" command in UNIX / Linux
@@ -1469,40 +1466,8 @@ def maybe_copy_example_file(file_path):
            shutil.copyfile(example_file_path, file_path)
-def get_file_by_url(url, retries=3, delay=10):
+def get_file_by_url(url, params, **kwargs):
-    """
+    return requests.get(url=url, params=params, **kwargs).text
    Get a file data located at a particular URL.
    Parameters
    ----------
    url : str
        The URL at which to access the data.
    Returns
    -------
    url_data : str or None
        The data retrieved at that URL from the file. Returns None if the
        attempted retrieval is unsuccessful.
    Note
    ----
    - BeautifulSoup is used in this case to avoid having to search in which
        format we have to encode or decode data before parsing it to UTF-8.
    """
    while retries:
        try:
            with urlopen(url) as f:
                soup = BeautifulSoup(f.read(), "lxml").get_text()
                return "\n".join(list(map(domain_to_idna, soup.split("\n"))))
        except Exception as e:
            if 'failure in name resolution' in str(e):
                print('No internet connection! Retrying in {} seconds'.format(delay))
                time.sleep(delay)
                retries -= 1
                continue
            break
    print("Problem getting file: ", url)
 def write_data(f, data):