Exclude domains contained in a whitelist

This commit is contained in:
FrancoGag
2015-11-05 10:44:42 -03:00
parent ad5605c4c0
commit 461b06710c

View File

@@ -77,10 +77,18 @@ SOURCES = listdir_nohidden(DATA_PATH)
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILE = os.path.join(BASEDIR_PATH, 'readme.md')
TARGET_HOST = '0.0.0.0'
WHITELIST_FILE = os.path.join(BASEDIR_PATH, 'whitelist')
# Exclusions
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
# Exclutions from whitelist file
EXCLUSIONS = []
if os.path.isfile(WHITELIST_FILE):
with open(WHITELIST_FILE, "r") as ins:
for line in ins:
EXCLUSIONS.append(line)
# Common domains to exclude
COMMON_EXCLUSIONS = ['hulu.com']
@@ -90,6 +98,7 @@ numberOfRules = 0
def main():
promptForUpdate()
excludeFromFile()
promptForExclusions()
mergeFile = createInitialFile()
finalFile = removeDups(mergeFile)
@@ -99,6 +108,14 @@ def main():
promptForMove(finalFile)
# Exclusion from file
def excludeFromFile():
for domain in EXCLUSIONS:
if (domain != '' and not domain.startswith("#")):
domainRegex = re.compile("www\d{0,3}[.]|https?")
if not (domainRegex.match(domain)):
excludeDomain(domain)
# Prompt the User
def promptForUpdate():
response = query_yes_no("Do you want to update all data sources?")