Add support for exotic TLDs.

Indeed, before this patch, we were not supporting TLD which
contains digits and dashes (-) when "puny-encoded".
This commit is contained in:
funilrys
2023-08-10 16:33:53 +02:00
parent 2415078892
commit 560615dd7f
2 changed files with 11 additions and 3 deletions

View File

@@ -840,9 +840,12 @@ class TestNormalizeRule(BaseStdout):
# Note: "Bare"- Domains are accepted. IP are excluded.
for rule in [
"128.0.0.1",
"::1",
"0.0.0.0 128.0.0.2",
"0.0.0 google",
"0.1.2.3.4 foo/bar",
"0.0.0.0 https",
"0.0.0.0 https..",
]:
self.assertEqual(normalize_rule(rule, **kwargs), (None, None))
@@ -905,7 +908,7 @@ class TestNormalizeRule(BaseStdout):
sys.stdout = StringIO()
def test_no_comment_raw(self):
for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com"):
for rule in ("twitter.com", "google.com", "foo.bar.edu", "www.example-foo.bar.edu", "www.example-3045.foobar.com", "www.example.xn--p1ai"):
expected = (rule, "0.0.0.0 " + rule + "\n")
actual = normalize_rule(

View File

@@ -1061,7 +1061,10 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
"""
first try: IP followed by domain
"""
regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
# WARNING:
# [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
regex = r"^\s*(\d{1,3}\.){3}\d{1,3}\s+((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
result = re.search(regex, rule)
if result:
@@ -1090,7 +1093,9 @@ def normalize_rule(rule, target_ip, keep_domain_comments):
"""
# deny any potential IPv6 address here.
if ":" not in rule:
regex = r"^\s*((?:\w+\.)+[a-zA-Z\.-]+)(.*)"
# WARNING:
# [a-zA-Z0-9\-]+ is NOT an issue. (e.g., xn--p1ai TLD - and others).
regex = r"^\s*((?:[\w\-\.]+\.)+[a-zA-Z0-9\-]+)(.*)"
result = re.search(regex, rule)
if result: