Global ectomy.

This commit is contained in:
Steven Black
2016-03-24 01:10:13 -04:00
parent 7fd9e5ab2e
commit 5f7c9cbd1c

View File

@@ -74,59 +74,62 @@ def listdir_nohidden(path):
# Project Settings
BASEDIR_PATH = os.path.dirname(os.path.realpath(__file__))
DATA_PATH = os.path.join(BASEDIR_PATH, 'data')
EXTENSIONS_PATH = os.path.join(BASEDIR_PATH, 'extensions')
DATA_FILENAMES = 'hosts'
UPDATE_URL_FILENAME = 'update.info'
SOURCES = listdir_nohidden(DATA_PATH)
EXTENSIONS = listdir_nohidden(EXTENSIONS_PATH)
README_TEMPLATE = os.path.join(BASEDIR_PATH, 'readme_template.md')
README_FILENAME = 'readme.md'
WHITELIST_FILE = os.path.join(BASEDIR_PATH, 'whitelist')
README_DATA_FILENAME = "readmeData.json"
# Exclusions
EXCLUSION_PATTERN = '([a-zA-Z\d-]+\.){0,}' #append domain the end
EXCLUSIONS = []
# Common domains to exclude
COMMON_EXCLUSIONS = ['hulu.com']
defaults = {
"numberofrules" : 0,
"datapath" : os.path.join(BASEDIR_PATH, "data"),
"freshen" : True,
"replace" : False,
"extensionspath" : os.path.join(BASEDIR_PATH, "extensions"),
"extensions" : [],
"outputsubfolder" : "",
"datafilenames" : "hosts",
"targetip" : "0.0.0.0",
"updateurlfilename" : "update.info",
"readmefilename" : "readme.md",
"readmetemplate" : os.path.join(BASEDIR_PATH, "readme_template.md"),
"readmedata" : {},
"readmedatafilename" : "readmeData.json",
"exclusionpattern" : "([a-zA-Z\d-]+\.){0,}",
"exclusionregexs" : [],
"exclusions" : [],
"commonexclusions" : ["hulu.com"],
"whitelistfile" : os.path.join(BASEDIR_PATH, "whitelist")}
# Global vars
outputPath = BASEDIR_PATH
exclusionRegexs = []
numberOfRules = 0
auto = False
update = True
replace = False
targetIP = "0.0.0.0"
extensions = []
options = {}
settings = {}
def main():
parser = argparse.ArgumentParser(description="Creates a unified hosts file from hosts stored in data subfolders.")
parser.add_argument("--auto", "-a", dest="auto", default=False, action='store_true', help="Run without prompting.")
parser.add_argument("--replace", "-r", dest="replace", default=False, action='store_true', help="Replace your active hosts file with this new hosts file.")
parser.add_argument("--ip", "-i", dest="targetIP", default="0.0.0.0", help="Target IP address. Default is 0.0.0.0.")
parser.add_argument("--ip", "-i", dest="targetip", default="0.0.0.0", help="Target IP address. Default is 0.0.0.0.")
parser.add_argument("--extensions", "-e", dest="extensions", default=[], nargs='*', help="Host extensions to include in the final hosts file.")
parser.add_argument("--output", "-o", dest="outputSubFolder", default="", help="Output subfolder for generated hosts file.")
parser.add_argument("--noupdate", "-n", dest="noUpdate", default=False, action='store_true', help="Don't update from host data sources.")
parser.add_argument("--output", "-o", dest="outputsubfolder", default="", help="Output subfolder for generated hosts file.")
parser.add_argument("--noupdate", "-n", dest="noupdate", default=False, action='store_true', help="Don't update from host data sources.")
args = parser.parse_args()
global defaults, options, settings
options = vars(parser.parse_args())
options["outputpath"] = os.path.join(BASEDIR_PATH, options["outputsubfolder"])
options["freshen"] = not options["noupdate"]
settings = {}
settings.update(defaults)
settings.update(options)
settings["sources"] = listdir_nohidden(settings["datapath"])
global auto, update, replace, targetIP, replace, extensions, outputPath, readmeData
auto = args.auto
replace = args.replace
targetIP = args.targetIP
outputPath = os.path.join(BASEDIR_PATH, args.outputSubFolder)
update = not args.noUpdate
# All our extensions folders...
extensions = [os.path.basename(item) for item in listdir_nohidden(EXTENSIONS_PATH)]
settings["extensions"] = [os.path.basename(item) for item in listdir_nohidden(settings["extensionspath"])]
# ... intersected with the extensions passed-in as arguments, then sorted.
extensions = sorted( list(set(args.extensions).intersection(extensions)) )
settings["extensions"] = sorted( list(set(options["extensions"]).intersection(settings["extensions"])) )
with open(README_DATA_FILENAME, 'r') as f:
readmeData = json.load(f)
with open(settings["readmedatafilename"], 'r') as f:
settings["readmedata"] = json.load(f)
promptForUpdate()
promptForExclusions()
@@ -134,9 +137,9 @@ def main():
removeOldHostsFile()
finalFile = removeDupsAndExcl(mergeFile)
finalizeFile(finalFile)
updateReadmeData(numberOfRules)
printSuccess('Success! The hosts file has been saved in folder\n' + outputPath + '\nIt contains ' +
"{:,}".format(numberOfRules) + ' unique entries.')
updateReadmeData()
printSuccess('Success! The hosts file has been saved in folder ' + settings["outputsubfolder"] + '\nIt contains ' +
"{:,}".format(settings["numberofrules"]) + ' unique entries.')
promptForMove(finalFile)
@@ -149,24 +152,24 @@ def promptForUpdate():
except:
printFailure("ERROR: No 'hosts' file in the folder, try creating one manually")
if not update:
if not settings["freshen"]:
return
response = "yes" if auto else query_yes_no("Do you want to update all data sources?")
response = "yes" if settings["auto"] else query_yes_no("Do you want to update all data sources?")
if response == "yes":
updateAllSources()
else:
if not auto:
if not settings["auto"]:
print ("OK, we\'ll stick with what we\'ve got locally.")
def promptForExclusions():
response = "no" if auto else query_yes_no("Do you want to exclude any domains?\n" +
response = "no" if settings["auto"] else query_yes_no("Do you want to exclude any domains?\n" +
"For example, hulu.com video streaming must be able to access " +
"its tracking and ad servers in order to play video.")
if response == "yes":
displayExclusionOptions()
else:
if not auto:
if not settings["auto"]:
print ("OK, we\'ll only exclude domains in the whitelist.")
def promptForMoreCustomExclusions():
@@ -178,10 +181,10 @@ def promptForMoreCustomExclusions():
def promptForMove(finalFile):
if replace:
if settings["replace"]:
response = "yes"
else:
response = "no" if auto else query_yes_no("Do you want to replace your existing hosts file " +
response = "no" if settings["auto"] else query_yes_no("Do you want to replace your existing hosts file " +
"with the newly generated file?")
if response == "yes":
moveHostsFileIntoPlace(finalFile)
@@ -191,7 +194,7 @@ def promptForMove(finalFile):
# Exclusion logic
def displayExclusionOptions():
for exclusionOption in COMMON_EXCLUSIONS:
for exclusionOption in settings["common_exclusions"]:
response = query_yes_no("Do you want to exclude the domain " + exclusionOption + " ?")
if response == "yes":
excludeDomain(exclusionOption)
@@ -211,11 +214,11 @@ def gatherCustomExclusions():
return
def excludeDomain(domain):
exclusionRegexs.append(re.compile(EXCLUSION_PATTERN + domain))
settings["exclusionregexs"].append(re.compile(settings["exclusionpattern"] + domain))
def matchesExclusions(strippedRule):
strippedDomain = strippedRule.split()[1]
for exclusionRegex in exclusionRegexs:
for exclusionRegex in settings["exclusionregexs"]:
if exclusionRegex.search(strippedDomain):
return True
return False
@@ -223,7 +226,7 @@ def matchesExclusions(strippedRule):
# Update Logic
def updateAllSources():
allsources = list(set(SOURCES) | set(EXTENSIONS))
allsources = list(set(settings["sources"]) | set(settings["extensions"]))
for source in allsources:
if os.path.isdir(source):
updateURLs = getUpdateURLsFromFile(source)
@@ -237,14 +240,14 @@ def updateAllSources():
try:
updatedFile = updatedFile.replace('\r', '') #get rid of carriage-return symbols
# This is cross-python code
dataFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'wb')
dataFile = open(os.path.join(settings["datapath"], source, settings["datafilenames"]), 'wb')
writeData(dataFile, updatedFile)
dataFile.close()
except:
print ("Skipping.")
def getUpdateURLsFromFile(source):
pathToUpdateFile = os.path.join(DATA_PATH, source, UPDATE_URL_FILENAME)
pathToUpdateFile = os.path.join(settings["datapath"], source, settings["updateurlfilename"])
if os.path.exists(pathToUpdateFile):
updateFile = open(pathToUpdateFile, 'r')
retURLs = updateFile.readlines()
@@ -259,7 +262,7 @@ def getUpdateURLsFromFile(source):
def getUpdateURLFromFile(source):
pathToUpdateFile = os.path.join(DATA_PATH, source, UPDATE_URL_FILENAME)
pathToUpdateFile = os.path.join(settings["datapath"], source, settings["updateurlfilename"])
if os.path.exists(pathToUpdateFile):
updateFile = open(pathToUpdateFile, 'r')
retURL = updateFile.readline().strip()
@@ -274,34 +277,34 @@ def getUpdateURLFromFile(source):
# File Logic
def createInitialFile():
mergeFile = tempfile.NamedTemporaryFile()
for source in SOURCES:
curFile = open(os.path.join(DATA_PATH, source, DATA_FILENAMES), 'r')
for source in settings["sources"]:
curFile = open(os.path.join(settings["datapath"], source, settings["datafilenames"]), 'r')
#Done in a cross-python way
writeData(mergeFile, curFile.read())
for source in extensions:
curFile = open(os.path.join(EXTENSIONS_PATH, source, DATA_FILENAMES), 'r')
for source in settings["extensions"]:
curFile = open(os.path.join(settings["extensionspath"], source, settings["datafilenames"]), 'r')
#Done in a cross-python way
writeData(mergeFile, curFile.read())
return mergeFile
def removeDupsAndExcl(mergeFile):
global numberOfRules
if os.path.isfile(WHITELIST_FILE):
with open(WHITELIST_FILE, "r") as ins:
numberOfRules = settings["numberofrules"]
if os.path.isfile(settings["whitelistfile"]):
with open(settings["whitelistfile"], "r") as ins:
for line in ins:
if line.rstrip():
EXCLUSIONS.append(line)
settings["exclusions"].append(line)
if not os.path.exists(outputPath):
os.makedirs(outputPath)
if not os.path.exists(settings["outputpath"]):
os.makedirs(settings["outputpath"])
# Another mode is required to read and write the file in Python 3
if Python3:
finalFile = open(os.path.join(outputPath, 'hosts'), 'w+b')
finalFile = open(os.path.join(settings["outputpath"], 'hosts'), 'w+b')
else:
finalFile = open(os.path.join(outputPath, 'hosts'), 'w+')
finalFile = open(os.path.join(settings["outputpath"], 'hosts'), 'w+')
mergeFile.seek(0) # reset file pointer
hostnames = set()
@@ -309,6 +312,7 @@ def removeDupsAndExcl(mergeFile):
hostnames.add("localhost.localdomain")
hostnames.add("local")
hostnames.add("broadcasthost")
exclusions = settings["exclusions"]
for line in mergeFile.readlines():
write = 'true'
# Explicit encoding
@@ -329,7 +333,7 @@ def removeDupsAndExcl(mergeFile):
if matchesExclusions(strippedRule):
continue
hostname, normalizedRule = normalizeRule(strippedRule) # normalize rule
for exclude in EXCLUSIONS:
for exclude in exclusions:
if exclude in line:
write = 'false'
break
@@ -338,6 +342,8 @@ def removeDupsAndExcl(mergeFile):
hostnames.add(hostname)
numberOfRules += 1
settings["numberofrules"] = numberOfRules
mergeFile.close()
return finalFile
@@ -349,9 +355,9 @@ def normalizeRule(rule):
hostname = hostname.lower().strip() # explicitly lowercase and trim the hostname
if suffix is not '':
# add suffix as comment only, not as a separate host
return hostname, "%s %s #%s\n" % (targetIP, hostname, suffix)
return hostname, "%s %s #%s\n" % (settings["targetip"], hostname, suffix)
else:
return hostname, "%s %s\n" % (targetIP, hostname)
return hostname, "%s %s\n" % (settings["targetip"], hostname)
print ("==>%s<==" % rule)
return None, None
@@ -370,14 +376,13 @@ def stripRule(line):
return splitLine[0] + ' ' + splitLine[1]
def writeOpeningHeader(finalFile):
global numberOfRules
finalFile.seek(0) #reset file pointer
fileContents = finalFile.read() #save content
finalFile.seek(0) #write at the top
writeData(finalFile, '# This hosts file is a merged collection of hosts from reputable sources,\n')
writeData(finalFile, '# with a dash of crowd sourcing via Github\n#\n')
writeData(finalFile, '# Date: ' + time.strftime("%B %d %Y", time.gmtime()) + '\n')
writeData(finalFile, '# Number of unique domains: ' + "{:,}".format(numberOfRules) + '\n#\n')
writeData(finalFile, '# Number of unique domains: ' + "{:,}".format(settings["numberofrules"]) + '\n#\n')
writeData(finalFile, '# Fetch the latest version of this file: https://raw.githubusercontent.com/StevenBlack/hosts/master/hosts\n')
writeData(finalFile, '# Project home page: https://github.com/StevenBlack/hosts\n#\n')
writeData(finalFile, '# ===============================================================\n')
@@ -399,19 +404,19 @@ def writeOpeningHeader(finalFile):
finalFile.write(fileContents)
def updateReadmeData(numberOfRules):
def updateReadmeData():
extensionsKey = "base"
hostsLocation = ""
if extensions:
extensionsKey = "-".join(extensions)
if settings["extensions"]:
extensionsKey = "-".join(settings["extensions"])
generationData = {}
generationData["location"] = outputPath
generationData["entries"] = numberOfRules
generationData["location"] = os.path.join(settings["outputsubfolder"], '')
generationData["entries"] = settings["numberofrules"]
readmeData[extensionsKey] = generationData
with open(README_DATA_FILENAME, 'w') as f:
json.dump(readmeData, f)
settings["readmedata"][extensionsKey] = generationData
with open(settings["readmedatafilename"], 'w') as f:
json.dump(settings["readmedata"], f)
def moveHostsFileIntoPlace(finalFile):
if os.name == 'posix':
@@ -532,19 +537,19 @@ def printFailure(text):
# End Helper Functions
# Orphaned now.
def updateReadme(numberOfRules):
def updateReadme():
extensionsStr = "* Extensions: **none**."
extensionsHeader = ""
if extensions:
extensionsStr = "* Extensions: **" + ", ".join(extensions) + "**."
extensionsHeader = "with "+ ", ".join(extensions) + " extensions"
if settings["extensions"]:
extensionsStr = "* Extensions: **" + ", ".join(settings["extensions"]) + "**."
extensionsHeader = "with "+ ", ".join(settings["extensions"]) + " extensions"
with open(os.path.join(outputPath,README_FILENAME), "wt") as out:
for line in open(README_TEMPLATE):
with open(os.path.join(settings["outputpath"],settings["readmefilename"]), "wt") as out:
for line in open(settings["readme_template"]):
line = line.replace( '@GEN_DATE@', time.strftime("%B %d %Y", time.gmtime()))
line = line.replace( '@EXTENSIONS@', extensionsStr )
line = line.replace( '@EXTENSIONS_HEADER@', extensionsHeader )
out.write(line.replace('@NUM_ENTRIES@', "{:,}".format(numberOfRules)))
out.write(line.replace('@NUM_ENTRIES@', "{:,}".format(settings["numberofrules"])))