mirror of
https://github.com/unitedstates/congress-legislators.git
synced 2026-04-12 09:00:03 -04:00
117 lines
4.9 KiB
Python
117 lines
4.9 KiB
Python
# Runs various validation tests on current legislators.
|
|
|
|
import rtyaml
|
|
|
|
# Congressional district apportionment for the 113th-... Congresses.
|
|
# The territories with delegates have 'T'. All others have the number
|
|
# of districts (e.g. 1 for one at-large district).
|
|
apportionment = {'AL': 7, 'AK': 1, 'AS': 'T', 'AZ': 9, 'AR': 4, 'CA': 53, 'CO': 7, 'CT': 5, 'DE': 1, 'DC': 'T', 'FL': 27, 'GA': 14, 'GU': 'T', 'HI': 2, 'ID': 2, 'IL': 18, 'IN': 9, 'IA': 4, 'KS': 4, 'KY': 6, 'LA': 6, 'ME': 2, 'MD': 8, 'MA': 9, 'MI': 14, 'MN': 8, 'MS': 4, 'MO': 8, 'MT': 1, 'NE': 3, 'NV': 4, 'NH': 2, 'NJ': 12, 'NM': 3, 'NY': 27, 'NC': 13, 'ND': 1, 'MP': 'T', 'OH': 16, 'OK': 5, 'OR': 5, 'PA': 18, 'PR': 'T', 'RI': 2, 'SC': 7, 'SD': 1, 'TN': 9, 'TX': 36, 'UT': 4, 'VT': 1, 'VI': 'T', 'VA': 11, 'WA': 10, 'WV': 3, 'WI': 8, 'WY': 1}
|
|
|
|
def run():
|
|
|
|
P = rtyaml.load(open("../legislators-current.yaml"))
|
|
P_historical = rtyaml.load(open("../legislators-historical.yaml"))
|
|
|
|
offices = { }
|
|
senate_ranks = { }
|
|
|
|
for p in P:
|
|
# IDs.
|
|
|
|
if not p['id'].get('thomas'):
|
|
print("No THOMAS ID for %s." % p['id']['bioguide'])
|
|
elif not isinstance(p['id']['thomas'], str) or p['id']['thomas'][0] != '0':
|
|
print("Invalid THOMAS ID for %s: %s." % (p['id']['bioguide'], str(p['id']['thomas'])))
|
|
|
|
# Biographical data.
|
|
|
|
if p.get("bio", {}).get("gender") not in ("M", "F"):
|
|
print("Gender of %s is not valid: %s." % (p['id']['bioguide'], str(p.get("bio", {}).get("gender")) ))
|
|
if len(p.get("bio", {}).get("birthday", "")) != 10:
|
|
print("Birthday of %s is not valid: %s." % (p['id']['bioguide'], p.get("bio", {}).get("birthday", "")))
|
|
|
|
# Get the current term.
|
|
|
|
term = p['terms'][-1]
|
|
|
|
# Start/end dates.
|
|
|
|
if term['start'] not in ('2011-01-05', '2013-01-03', '2015-01-06'):
|
|
print("Term start date of %s is not right: %s." % (p['id']['bioguide'], term['start']))
|
|
|
|
if term['end'] not in ('2017-01-03', '2019-01-03', '2021-01-03'):
|
|
print("Term end date of %s is not right: %s." % (p['id']['bioguide'], term['end']))
|
|
|
|
# State and district.
|
|
|
|
if term['state'] not in apportionment:
|
|
print("Term state in %s is invalid: %s." % (p['id']['bioguide'], term['state']))
|
|
|
|
else:
|
|
if term['type'] == 'rep':
|
|
ap = apportionment[term['state']]
|
|
if not isinstance(term['district'], int) or term['district'] < 0:
|
|
print("Term district in %s is invalid: %s." % (p['id']['bioguide'], str(term['district'])))
|
|
elif ap in ("T", 1) and term['district'] != 0:
|
|
print("Term district in %s is invalid for an at-large state: %s." % (p['id']['bioguide'], str(term['district'])))
|
|
elif ap not in ("T", 1) and term['district'] == 0:
|
|
print("Term district in %s is invalid for a not-at-large state: %s." % (p['id']['bioguide'], str(term['district'])))
|
|
elif ap not in ("T", 1) and term['district'] > ap:
|
|
print("Term district in %s is invalid: %s." % (p['id']['bioguide'], str(term['district'])))
|
|
elif term['type'] == 'sen':
|
|
if term.get("class") not in (1, 2, 3):
|
|
print("Term class in %s is invalid: %s." % (p['id']['bioguide'], str(term['class'])))
|
|
|
|
# Make sure there are no duplicate offices -- checked at the end.
|
|
|
|
office = (term['type'], term['state'], term['district'] if term['type'] == 'rep' else term['class'])
|
|
offices.setdefault(office, []).append(p)
|
|
|
|
# Seate state rank.
|
|
|
|
# Collect all of the senate state ranks so we can check that the distribution
|
|
# within each state is correct, at the end.
|
|
if term['type'] == 'sen':
|
|
senate_ranks.setdefault(term['state'], []).append((p['id']['bioguide'], term['state_rank']))
|
|
|
|
# Party.
|
|
|
|
if term['party'] not in ("Republican", "Democrat", "Independent"):
|
|
print("Suspicious party for %s: %s." % (p['id']['bioguide'], term['party']))
|
|
elif term['party'] != "Independent" and term.get("caucus") != None:
|
|
print("caucus field should not be used if the party is not Indpeendent, in %s: %s." % (p['id']['bioguide'], term['caucus']))
|
|
elif term['party'] == "Independent" and term.get("caucus") is None:
|
|
print("caucus field should be used if the party is Indpeendent, in %s: %s." % (p['id']['bioguide'], term['caucus']))
|
|
|
|
# Check for duplicate offices.
|
|
for k, v in offices.items():
|
|
if len(v) > 1:
|
|
print("Multiple holders of the office", k)
|
|
print(rtyaml.dump(v))
|
|
|
|
# Check for duplicate use of any of the IDs.
|
|
ids = set()
|
|
for p in P + P_historical:
|
|
# Collect IDs for uniqueness test.
|
|
for k, v1 in p['id'].items():
|
|
# The 'fec' ID is a list, convert the others to a list.
|
|
if not isinstance(v1, list):
|
|
v1 = [v1]
|
|
for v in v1:
|
|
key = (k, v)
|
|
if key in ids:
|
|
print("Duplicate ID: %s %s" % (k, v))
|
|
continue
|
|
ids.add(key)
|
|
|
|
|
|
for state, ranks in senate_ranks.items():
|
|
# There can be a junior and senior senator, a senior senator, or no senators.
|
|
# There can't be two juniors, two seniors, or just a junior senator.
|
|
r = sorted(rr[1] for rr in ranks)
|
|
if r not in [['junior', 'senior'], ['senior'], []]:
|
|
print("State ranks for %s cannot be right: %s." % (state, ranks))
|
|
|
|
if __name__ == '__main__':
|
|
run()
|