#!/usr/bin/python # #Author: Gleeda # #This program is free software; you can redistribute it and/or #modify it under the terms of the GNU General Public License #as published by the Free Software Foundation; either version #2 of the License, or (at your option) any later version. # # getmalwaredomains.py # # collects domain/IP/Date/Reverse Lookup information # from malwaredomainlist.com import os, sys import getopt import httplib import sqlite3 DBNAME = "malwaredomains.db" MAXWAIT = (60*10) # ten minutes def usage(): print 'getmalwaredomains.py:' print ' - collects domain/IP/Date/Reverse Lookup information from malwaredomainlist.com' print '\t-h, --help : print help message' print '\t-d, --database : sqlite3 db to store domain info (default malwaredomains.db)' print '\t-b, --bulk : bulk upload up to given number of pages' print '\t-u, --update : process entries from update link' print '\t-a, --add : add a particular domain\n' class malwaredomains: def adddomain(self, d): domain = d url = 'http://www.malwaredomainlist.com/mdl.php?search=' + domain + '&colsearch=All&quantity=50' try: conn = httplib.HTTPConnection('www.malwaredomainlist.com') conn.request('GET', '/mdl.php?search=' + domain + '&colsearch=All&quantity=50') response = conn.getresponse().read() if response.find('Date (UTC)') != -1: print "Page exists: %s" % url return response else: print "Unable to connect!" except Exception, e: print "Error connecting:", e pass return None def getupdates(self): url = 'http://www.malwaredomainlist.com/update.php' try: conn = httplib.HTTPConnection('www.malwaredomainlist.com') conn.request('GET', '/update.php') response = conn.getresponse().read() if response.find('Date (UTC)') != -1: print "Page exists: %s" % url return response else: print "Unable to connect!" except Exception, e: print "Error connecting:", e pass return None def getdomains(self, page): url = 'http://www.malwaredomainlist.com/mdl.php' page = page try: conn = httplib.HTTPConnection('www.malwaredomainlist.com') conn.request('GET', '/mdl.php?inactive=&sort=Date&search=&colsearch=All&ascordesc=DESC&quantity=100&page=' + page) response = conn.getresponse().read() if response.find('Date (UTC)') != -1: print "Page exists: %s" % url return response else: print "Unable to connect!" except Exception, e: print "Error connecting:", e pass return None class Domains: def __init__(self, data): self.data = data self.column_names = {0 : 'Date', 1 : 'Domain', 2 : 'IP', 3 : 'Rlookup', 4 : 'Description', 5 : 'Registrant', 6 : 'ASN', 7 : 'COUNTRY'} def process_entries(self, str): items = [] line = str line.rstrip() line.lstrip() line.strip() line = line.replace('', '') line = line.replace('', '') line = line.replace('', '') line = line.strip("") return line def process_column(self, column, ncol): start_value = column.find('>') if start_value == -1: return column = column[start_value+1:] end_column = column.find('') if end_column == -1: return str = column[0:end_column] if str.find('img src') != -1: return None if self.column_names[ncol] == 'Date': if str.find("/") != -1 and str.find(":") != -1: date = self.process_entries(str) date = date.replace("_", " ") return {'dates': date} else: return None elif self.column_names[ncol] == 'Domain': domain = self.process_entries(str) domain = domain.lstrip('.') return {'domains': domain} elif self.column_names[ncol] == 'Rlookup': rlookup = self.process_entries(str) rlookup = rlookup.rstrip('.') return {'rlookups': rlookup} elif self.column_names[ncol] == 'IP': ip = self.process_entries(str) return {'ips': ip} return None def process_row(self, row): end_row = row.find('') if end_row == -1: return row = row[0:end_row] offset = 0 ncol = 0 row_info = {} while row[offset:].find('') if end_table != -1: table_data = data[start_table:start_table+end_table] offset = 0 nrow = 0 domain_info = [] while table_data[offset:].find(' 0: row_info = self.process_row(row) domain_info.append(row_info) offset += ofs nrow += 1 return domain_info def init(): global DBNAME if os.path.isfile(DBNAME): return conn = sqlite3.connect(DBNAME) curs = conn.cursor() curs.executescript(''' CREATE TABLE domains ( id INTEGER PRIMARY KEY, domain TEXT, ip TEXT, rlookup TEXT, date TEXT ); ''') curs.close() if os.path.isfile(DBNAME): print "Success." else: print "Failed." def process_domains(domain_info): conn = sqlite3.connect(DBNAME) conn.text_factory = str cur = conn.cursor() for info in domain_info: cur.execute("SELECT COUNT(*) FROM domains where domain=? AND ip=? AND rlookup=? AND date=?", (info['domains'], info['ips'], info['rlookups'], info['dates'])) count = cur.fetchone()[0] if count < 1: cur.execute("INSERT INTO domains VALUES(null, ?, ?, ?, ?)", (info['domains'], info['ips'], info['rlookups'], info['dates'])) print '[domain]', info['domains'], '\n\t[IP] ', info['ips'] else: print "NOT ADDED " +info['domains'] + '\n\twith IP: ' + info['ips'] + ' already in database, not added' conn.commit() conn.close() def main(): try: opts, args = getopt.getopt(sys.argv[1:], "hb:a:d:u", ["help", "bulk=", "add=", "database="]) except getopt.GetoptError, err: print str(err) sys.exit(2) bulk = False update = False domain = None global DBNAME for o, a in opts: if o in ("-h", "--help"): usage() sys.exit(2) elif o in ("-b", "--bulk"): bulk = True pages = a elif o in ("-a", "--add"): domain = a elif o in ("-d", "--database"): DBNAME = a elif o in ("-u", "--update"): update = True else: assert False, "unhandled option\n\n" sys.exit(2) if not bulk and domain == None and not update: print 'You must choose either bulk, update or single addition mode!\n' usage() sys.exit(-1) init() md = malwaredomains() data = None if domain != None: data = md.adddomain(domain) if data == None: print "Cannot find url!" else: fs = Domains(data) domain_info = fs.extract() process_domains(domain_info) if update: data = md.getupdates() if data == None: print "Cannot find url!" else: fs = Domains(data) domain_info = fs.extract() process_domains(domain_info) if bulk: if int(pages) >= 0: i = 0 while i <= int(pages): data = md.getdomains(str(i)) if data == None: print "Cannot find url!" return else: fs = Domains(data) domain_info = fs.extract() process_domains(domain_info) i = i+1 if __name__ == "__main__": main()