Permalink
Browse files

Major overhaul. Gender has bugs.

  • Loading branch information...
1 parent f52fb50 commit 2f689a021f63838b78f325ff75dcbde9610d31fd @wilson428 committed Sep 25, 2012
Showing with 69 additions and 0 deletions.
  1. +69 −0 gender.py
View
@@ -0,0 +1,69 @@
+import json, sqlite3
+
+def dict_factory(cursor, row):
+ d = {}
+ for idx, col in enumerate(cursor.description):
+ d[col[0]] = row[idx]
+ return d
+
+
+path = "/Users/cewilson/Desktop/source/FEC/"
+
+conn = sqlite3.connect(path + 'names.sqlite')
+conn.row_factory = dict_factory
+c = conn.cursor()
+
+def hyphens():
+ not_founds = c.execute("SELECT * FROM stats where gender = 'Not found'").fetchall()
+ for not_found in not_founds:
+ h = not_found['name'].split('-')
+ if len(h) > 1:
+ g0 = get_gender(h[0])
+ g1 = get_gender(h[1])
+ if g0 == g1 and (g0 == "male" or g0 == "female"):
+ c.execute("update stats set gender = \"%s\" where name = \"%s\"" % (g0, not_found['name']))
+ conn.commit()
+
+
+def input_genders(mn=10):
+ not_founds = c.execute("SELECT * FROM stats where gender = 'Not found' and (obama >= %i or romney >= %i) order by name" % (mn, mn)).fetchall()
+ for not_found in not_founds:
+ g = raw_input("guess gender for %s: " % not_found['name']).lower()
+ if g == 'm' or g == 'male':
+ c.execute("update stats set gender = \"male\" where name = \"%s\"" % not_found['name'])
+ elif g == 'f' or g == 'female':
+ c.execute("update stats set gender = \"female\" where name = \"%s\"" % not_found['name'])
+ elif g == 'b':
+ c.execute("update stats set gender = \"both\" where name = \"%s\"" % not_found['name'])
+ elif g == 'u':
+ c.execute("update stats set gender = \"unknown\" where name = \"%s\"" % not_found['name'])
+ elif g == 'x':
+ c.execute("update stats set gender = \"\" where name = \"%s\"" % not_found['name'])
+ elif g == "exit":
+ return
+ conn.commit()
+
+def get_gender(name):
+ try:
+ g = gender.gender[name.upper()]
+ except KeyError as e:
+ g = "Not found"
+ return g
+
+def save_genders():
+ all_names = {}
+ names = json.load(open("gender.json", "r"))
+ for name in names:
+ all_names[name.title()] = names[name]
+
+ founds = c.execute("SELECT * FROM stats where gender = 'male' or gender = 'female' or gender = 'both'").fetchall()
+ for found in founds:
+ all_names[found['name'].title()] = found['gender']
+
+ f = open("gender.json", "w")
+ f.write(json.dumps(all_names, indent=3, sort_keys = True))
+ f.close()
+
+#save_genders()
+
+conn.close()

0 comments on commit 2f689a0

Please sign in to comment.