#!/usr/bin/python # CNN 2004 Election Result harvester # This script is public domain. import httplib import time State_List = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY'] class State: def __init__(self, state): self.state = state self.precincts = 0 self.candidates = {} self.updated = '' def getState(self): return self.state def getPrecincts(self): return self.precincts def getCandidates(self): return self.candidates.keys() def getCandidate(self, c): return self.candidates[c] def hasCandidate(self, c): return self.candidates.has_key(c) def getUpdated(self): return self.updated def setPrecincts(self, p): self.precincts = p def addCandidate(self, c, n): self.candidates[c] = n def setUpdated(self, u): self.updated = u def getPrecincts(html): start = html.find('') + 3 end = html.find('%') percent = html[start:end] return int(html[start:end]) def getResultsForState(state): ret = State(state) serv = httplib.HTTPConnection('www.cnn.com') headers = {'UserAgent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1'} serv.request('GET', '/ELECTION/2004/pages/results/states/' + state + '/', '', headers) resp = serv.getresponse() html = resp.read() ret.setPrecincts(getPrecincts(html)) start = html.find('
Updated: ') html = html[start:len(html)] start = html.find('ed: ') + 4 html = html[start:len(html)] end = html.find('
') updated = html[0:end] ret.setUpdated(updated) done = False scorecard = 0 while not done: #start = html.find('resizable=no,width=620,height=430\')">') #if (start == -1): # break start = html.find('width="18" height="18">') html = html[start:len(html)] start = html.find('') + 3 html = html[start:len(html)] candidate = '' if html[0] == '<': start = html.find(')">') + 3 html = html[start:len(html)] end = html.find('') candidate = html[0:end] else: end = html.find('') candidate = html[0:end] start = html.find('') html = html[start:len(html)] start = html.find('4">') + 3 html = html[start:len(html)] end = html.find('') count = html[0:end] #print count count = int(count.replace(',', '')) ret.addCandidate(candidate, count) scorecard = html.find('scorecard') #print "Score: " + str(scorecard) if scorecard < 200: # 200 is an educated guess break return ret def saveData(path, states): totals = {} grand_total = 0 for s in states: cands = s.getCandidates() for c in cands: count = s.getCandidate(c) grand_total += count if totals.has_key(c): totals[c] += count else: totals[c] = count f = open(path, 'w') f.write("State\tUpdated\tPrecincts") for c in totals.keys(): f.write("\t%s" % (c)) f.write("\n") for s in states: f.write(s.getState()) f.write("\t%s" % (s.getUpdated())) f.write("\t%s" % (s.getPrecincts())) cands = s.getCandidates() for c in totals.keys(): if s.hasCandidate(c): f.write("\t%i" % (s.getCandidate(c))) else: f.write("\t0") f.write("\n") f.write("Totals\t\t") for c in totals.keys(): f.write("\t%i" % (totals[c])) f.write("\n\nGrand Total\t%i" % (grand_total)) f.close() if __name__ == '__main__': states = [] #State_List = ['DC'] count = 0 for i in State_List: print "Getting %s (%i/%i)" % (i, count, len(State_List)) states.append(getResultsForState(i)) count += 1 saveData("election.txt", states)