#!/usr/bin/python
# CNN 2004 Election Result harvester
# This script is public domain.
import httplib
import time
State_List = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
class State:
def __init__(self, state):
self.state = state
self.precincts = 0
self.candidates = {}
self.updated = ''
def getState(self):
return self.state
def getPrecincts(self):
return self.precincts
def getCandidates(self):
return self.candidates.keys()
def getCandidate(self, c):
return self.candidates[c]
def hasCandidate(self, c):
return self.candidates.has_key(c)
def getUpdated(self):
return self.updated
def setPrecincts(self, p):
self.precincts = p
def addCandidate(self, c, n):
self.candidates[c] = n
def setUpdated(self, u):
self.updated = u
def getPrecincts(html):
start = html.find('
') + 3
end = html.find('%')
percent = html[start:end]
return int(html[start:end])
def getResultsForState(state):
ret = State(state)
serv = httplib.HTTPConnection('www.cnn.com')
headers = {'UserAgent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1'}
serv.request('GET', '/ELECTION/2004/pages/results/states/' + state + '/', '', headers)
resp = serv.getresponse()
html = resp.read()
ret.setPrecincts(getPrecincts(html))
start = html.find(' Updated: ')
html = html[start:len(html)]
start = html.find('ed: ') + 4
html = html[start:len(html)]
end = html.find(' ')
updated = html[0:end]
ret.setUpdated(updated)
done = False
scorecard = 0
while not done:
#start = html.find('resizable=no,width=620,height=430\')">')
#if (start == -1):
# break
start = html.find('width="18" height="18"> | ')
html = html[start:len(html)]
start = html.find('') + 3
html = html[start:len(html)]
candidate = ''
if html[0] == '<':
start = html.find(')">') + 3
html = html[start:len(html)]
end = html.find('')
candidate = html[0:end]
else:
end = html.find('')
candidate = html[0:end]
start = html.find(' | ')
html = html[start:len(html)]
start = html.find('4">') + 3
html = html[start:len(html)]
end = html.find(' | ')
count = html[0:end]
#print count
count = int(count.replace(',', ''))
ret.addCandidate(candidate, count)
scorecard = html.find('scorecard')
#print "Score: " + str(scorecard)
if scorecard < 200: # 200 is an educated guess
break
return ret
def saveData(path, states):
totals = {}
grand_total = 0
for s in states:
cands = s.getCandidates()
for c in cands:
count = s.getCandidate(c)
grand_total += count
if totals.has_key(c):
totals[c] += count
else:
totals[c] = count
f = open(path, 'w')
f.write("State\tUpdated\tPrecincts")
for c in totals.keys():
f.write("\t%s" % (c))
f.write("\n")
for s in states:
f.write(s.getState())
f.write("\t%s" % (s.getUpdated()))
f.write("\t%s" % (s.getPrecincts()))
cands = s.getCandidates()
for c in totals.keys():
if s.hasCandidate(c):
f.write("\t%i" % (s.getCandidate(c)))
else:
f.write("\t0")
f.write("\n")
f.write("Totals\t\t")
for c in totals.keys():
f.write("\t%i" % (totals[c]))
f.write("\n\nGrand Total\t%i" % (grand_total))
f.close()
if __name__ == '__main__':
states = []
#State_List = ['DC']
count = 0
for i in State_List:
print "Getting %s (%i/%i)" % (i, count, len(State_List))
states.append(getResultsForState(i))
count += 1
saveData("election.txt", states)