Unification of the sorting of the sources.

Before this patch, there was no proper sorting and the sources.

As @XhmikosR mentioned in StevenBlack/hosts#1166, without this patch,
the output is totally different in Windows. But let's be honest, if it
is like that under Windows, chances are that the same behavior happens
across different OSes or machines around the globe.

Another reason behind this patch is that - desperate the fact that we
certainly trust @StevenBlack - the integrity of the generated files
could not be guarenteed because of the sorting which may be completely
different from an OS to another and a machine to another.

For those reasons, this patch introduces a unification of the sorting of
all sources.

The idea behind this patch is to have @StevenBlack's ad-hoc hosts
file always on top (1st) and the rest of the sources sorted
alphabetically based on the name of the folder inside the `data`
or `extensions` directory.

This will ensure that we get the same result everywhere.

Concretely speaking, I just added the function (`sort_sources`) which
sorts a given list of sources files. And later on, call the new function
everywhere it was necessary. Tests of the newly introduced function
are also included.

Contributors:
  * @ScriptTiger
  * @XhmikosR

Notes:
  * This patch fix (completely ?) ScriptTiger/hosts#1
  * This patch fix https://github.com/StevenBlack/hosts/issues/1166#issuecomment-590511086
This commit is contained in:
funilrys
2020-04-21 22:37:37 +02:00
parent 6548b1af25
commit 665dc98191
2 changed files with 110 additions and 3 deletions

View File

@@ -41,6 +41,7 @@ from updateHostsFile import (
query_yes_no,
recursive_glob,
remove_old_hosts_file,
sort_sources,
strip_rule,
supports_color,
update_all_sources,
@@ -131,6 +132,80 @@ class TestGetDefaults(Base):
# End Project Settings
class TestSortSources(Base):
def test_sort_sources_simple(self):
given = [
"sbc.io",
"example.com",
"github.com",
]
expected = ["example.com", "github.com", "sbc.io"]
actual = sort_sources(given)
self.assertEqual(actual, expected)
def test_live_data(self):
given = [
"data/KADhosts/update.json",
"data/someonewhocares.org/update.json",
"data/StevenBlack/update.json",
"data/adaway.org/update.json",
"data/URLHaus/update.json",
"data/UncheckyAds/update.json",
"data/add.2o7Net/update.json",
"data/mvps.org/update.json",
"data/add.Spam/update.json",
"data/add.Dead/update.json",
"data/malwaredomainlist.com/update.json",
"data/Badd-Boyz-Hosts/update.json",
"data/hostsVN/update.json",
"data/yoyo.org/update.json",
"data/add.Risk/update.json",
"data/tiuxo/update.json",
"extensions/gambling/update.json",
"extensions/porn/clefspeare13/update.json",
"extensions/porn/sinfonietta-snuff/update.json",
"extensions/porn/tiuxo/update.json",
"extensions/porn/sinfonietta/update.json",
"extensions/fakenews/update.json",
"extensions/social/tiuxo/update.json",
"extensions/social/sinfonietta/update.json",
]
expected = [
"data/StevenBlack/update.json",
"data/adaway.org/update.json",
"data/add.2o7Net/update.json",
"data/add.Dead/update.json",
"data/add.Risk/update.json",
"data/add.Spam/update.json",
"data/Badd-Boyz-Hosts/update.json",
"data/hostsVN/update.json",
"data/KADhosts/update.json",
"data/malwaredomainlist.com/update.json",
"data/mvps.org/update.json",
"data/someonewhocares.org/update.json",
"data/tiuxo/update.json",
"data/UncheckyAds/update.json",
"data/URLHaus/update.json",
"data/yoyo.org/update.json",
"extensions/fakenews/update.json",
"extensions/gambling/update.json",
"extensions/porn/clefspeare13/update.json",
"extensions/porn/sinfonietta/update.json",
"extensions/porn/sinfonietta-snuff/update.json",
"extensions/porn/tiuxo/update.json",
"extensions/social/sinfonietta/update.json",
"extensions/social/tiuxo/update.json",
]
actual = sort_sources(given)
self.assertEqual(actual, expected)
# Prompt the User
class TestPromptForUpdate(BaseStdout, BaseMockDir):
def setUp(self):

View File

@@ -474,6 +474,34 @@ def prompt_for_move(final_file, **move_params):
# End Prompt the User
def sort_sources(sources):
"""
Sorts the sources.
The idea is that all Steven Black's list, file or entries
get on top and the rest sorted alphabetically.
Parameters
----------
sources: list
The sources to sort.
"""
result = sorted(
sources.copy(),
key=lambda x: x.lower().replace("-", "").replace("_", "").replace(" ", ""),
)
# Steven Black's repositories/files/lists should be on top!
steven_black_positions = [
x for x, y in enumerate(result) if "stevenblack" in y.lower()
]
for index in steven_black_positions:
result.insert(0, result.pop(index))
return result
# Exclusion logic
def display_exclusion_options(common_exclusions, exclusion_pattern, exclusion_regexes):
"""
@@ -641,7 +669,9 @@ def update_sources_data(sources_data, **sources_params):
source_data_filename = sources_params["sourcedatafilename"]
for source in recursive_glob(sources_params["datapath"], source_data_filename):
for source in sort_sources(
recursive_glob(sources_params["datapath"], source_data_filename)
):
update_file = open(source, "r", encoding="UTF-8")
update_data = json.load(update_file)
sources_data.append(update_data)
@@ -649,7 +679,9 @@ def update_sources_data(sources_data, **sources_params):
for source in sources_params["extensions"]:
source_dir = path_join_robust(sources_params["extensionspath"], source)
for update_file_path in recursive_glob(source_dir, source_data_filename):
for update_file_path in sort_sources(
recursive_glob(source_dir, source_data_filename)
):
update_file = open(update_file_path, "r")
update_data = json.load(update_file)
@@ -695,7 +727,7 @@ def update_all_sources(source_data_filename, host_filename):
# The transforms we support
transform_methods = {"jsonarray": jsonarray}
all_sources = recursive_glob("*", source_data_filename)
all_sources = sort_sources(recursive_glob("*", source_data_filename))
for source in all_sources:
update_file = open(source, "r", encoding="UTF-8")