mirror of
https://github.com/unitedstates/congress-legislators.git
synced 2025-12-19 18:05:51 -05:00
Merge pull request #863 from unitedstates/birthdays
Update some historical birthdays by reading the Bioguide bulk JSON download with a new script
This commit is contained in:
@@ -7382,6 +7382,7 @@
|
||||
last: Van Ness
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1769-11-04'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '1801-12-07'
|
||||
@@ -11930,6 +11931,7 @@
|
||||
last: Van Alen
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1772-12-31'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '1807-10-26'
|
||||
@@ -32138,6 +32140,7 @@
|
||||
last: Ware
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1776-02-16'
|
||||
terms:
|
||||
- type: sen
|
||||
start: '1821-12-03'
|
||||
@@ -32320,6 +32323,7 @@
|
||||
last: Williams
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1773-01-14'
|
||||
terms:
|
||||
- type: sen
|
||||
start: '1817-12-01'
|
||||
@@ -44526,6 +44530,7 @@
|
||||
last: Black
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1800-08-11'
|
||||
terms:
|
||||
- type: sen
|
||||
start: '1832-11-12'
|
||||
@@ -48519,7 +48524,7 @@
|
||||
middle: John
|
||||
last: Grayson
|
||||
bio:
|
||||
birthday: '1788-11-02'
|
||||
birthday: '1788-11-12'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -51831,7 +51836,7 @@
|
||||
middle: Dustin
|
||||
last: Coffin
|
||||
bio:
|
||||
birthday: '1805-09-09'
|
||||
birthday: '1804-09-10'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -54548,6 +54553,7 @@
|
||||
last: Word
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1805-02-06'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '1837-09-04'
|
||||
@@ -68219,7 +68225,7 @@
|
||||
middle: Clinton
|
||||
last: Hastings
|
||||
bio:
|
||||
birthday: '1813-11-22'
|
||||
birthday: '1814-11-22'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -68608,6 +68614,7 @@
|
||||
last: Lawrence
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1800-08-19'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '1845-12-01'
|
||||
@@ -95044,7 +95051,7 @@
|
||||
first: Emerson
|
||||
last: Etheridge
|
||||
bio:
|
||||
birthday: '1819-09-28'
|
||||
birthday: '1819-09-18'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -99283,7 +99290,7 @@
|
||||
middle: Jordan
|
||||
last: Crittenden
|
||||
bio:
|
||||
birthday: '1787-09-10'
|
||||
birthday: '1786-09-10'
|
||||
gender: M
|
||||
terms:
|
||||
- type: sen
|
||||
@@ -152901,7 +152908,7 @@
|
||||
middle: Jefferson
|
||||
last: De Haven
|
||||
bio:
|
||||
birthday: '1849-03-12'
|
||||
birthday: '1845-03-12'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -156691,7 +156698,7 @@
|
||||
middle: McClure
|
||||
last: Wiley
|
||||
bio:
|
||||
birthday: '1846-08-11'
|
||||
birthday: '1841-08-11'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -193661,7 +193668,7 @@
|
||||
middle: Davis
|
||||
last: James
|
||||
bio:
|
||||
birthday: '1850-02-27'
|
||||
birthday: '1849-02-27'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -243370,7 +243377,7 @@
|
||||
middle: Wickliffe
|
||||
last: Roark
|
||||
bio:
|
||||
birthday: '1887-01-22'
|
||||
birthday: '1877-01-22'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -269892,7 +269899,7 @@
|
||||
middle: Marvin
|
||||
last: Jones
|
||||
bio:
|
||||
birthday: '1886-02-26'
|
||||
birthday: '1882-02-26'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -293090,7 +293097,7 @@
|
||||
middle: Henry
|
||||
last: Miller
|
||||
bio:
|
||||
birthday: '1879-12-15'
|
||||
birthday: '1876-12-15'
|
||||
gender: M
|
||||
terms:
|
||||
- type: sen
|
||||
@@ -320497,7 +320504,7 @@
|
||||
middle: Victor
|
||||
last: Libonati
|
||||
bio:
|
||||
birthday: '1900-12-29'
|
||||
birthday: '1897-12-29'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -323473,7 +323480,7 @@
|
||||
middle: J.
|
||||
last: Brown
|
||||
bio:
|
||||
birthday: '1893-07-14'
|
||||
birthday: '1895-07-14'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -393490,7 +393497,7 @@
|
||||
middle: Thomas
|
||||
last: Redmond
|
||||
bio:
|
||||
birthday: '1955-01-28'
|
||||
birthday: '1954-01-28'
|
||||
gender: M
|
||||
terms:
|
||||
- type: rep
|
||||
@@ -439242,7 +439249,7 @@
|
||||
official_full: Joe Donnelly
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1955-09-28'
|
||||
birthday: '1955-09-29'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2007-01-04'
|
||||
@@ -450810,7 +450817,7 @@
|
||||
official_full: Denver Riggleman
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1970-03-01'
|
||||
birthday: '1970-03-17'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2019-01-03'
|
||||
@@ -452156,7 +452163,7 @@
|
||||
official_full: Antonio Delgado
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1977-01-19'
|
||||
birthday: '1977-01-28'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2019-01-03'
|
||||
@@ -458340,7 +458347,7 @@
|
||||
official_full: Andy Levin
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1960-08-01'
|
||||
birthday: '1960-08-10'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2019-01-03'
|
||||
@@ -458420,7 +458427,7 @@
|
||||
official_full: Anthony Gonzalez
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1984-09-19'
|
||||
birthday: '1984-09-18'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2019-01-03'
|
||||
@@ -458861,7 +458868,7 @@
|
||||
official_full: Joseph Sempolinski
|
||||
bio:
|
||||
gender: M
|
||||
birthday: '1982-02-10'
|
||||
birthday: '1983-02-10'
|
||||
terms:
|
||||
- type: rep
|
||||
start: '2022-09-13'
|
||||
|
||||
87
scripts/bioguide_xml.py
Normal file
87
scripts/bioguide_xml.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# Update metadata fields like birthdays from
|
||||
# bioguide.congress.gov bulk data downloads.
|
||||
#
|
||||
# Usage:
|
||||
# python3 bioguide_xml.py path/to/BioguideProfiles.zip
|
||||
|
||||
import sys
|
||||
import zipfile
|
||||
import re
|
||||
import json
|
||||
import rtyaml
|
||||
import datetime
|
||||
|
||||
# Load existing legislators and map bioguide IDs
|
||||
# to their entries.
|
||||
legislator_data = { }
|
||||
legislators = { }
|
||||
for ft in ("current", "historical"):
|
||||
with open("../legislators-{}.yaml".format(ft)) as f:
|
||||
data = rtyaml.load(f)
|
||||
legislator_data[ft] = data
|
||||
for p in data:
|
||||
legislators[p["id"]["bioguide"]] = p
|
||||
|
||||
def parse_birthday_from_text(text):
|
||||
# exceptions for not-nicely-placed semicolons
|
||||
text = text.replace("born in Cresskill, Bergen County, N. J.; April", "born April")
|
||||
text = text.replace("FOSTER, A. Lawrence, a Representative from New York; September 17, 1802;", "born September 17, 1802")
|
||||
text = text.replace("CAO, Anh (Joseph), a Representative from Louisiana; born in Ho Chi Minh City, Vietnam; March 13, 1967", "born March 13, 1967")
|
||||
text = text.replace("CRITZ, Mark S., a Representative from Pennsylvania; born in Irwin, Westmoreland County, Pa.; January 5, 1962;", "born January 5, 1962")
|
||||
text = text.replace("SCHIFF, Steven Harvey, a Representative from New Mexico; born in Chicago, Ill.; March 18, 1947", "born March 18, 1947")
|
||||
text = text.replace('KRATOVIL, Frank, M. Jr., a Representative from Maryland; born in Lanham, Prince George\u2019s County, Md.; May 29, 1968', "born May 29, 1968")
|
||||
|
||||
# look for a date
|
||||
pattern = r"born [^;]*?((?:January|February|March|April|May|June|July|August|September|October|November|December),? \d{1,2},? \d{4})"
|
||||
match = re.search(pattern, text, re.I)
|
||||
if not match or not match.group(1):
|
||||
# specifically detect cases that we can't handle to avoid unnecessary warnings
|
||||
if re.search("birth dates? unknown|date of birth is unknown", text, re.I): return None, None
|
||||
if re.search("born [^;]*?(?:in|about|before )?(?:(?:January|February|March|April|May|June|July|August|September|October|November|December) )?\d{4}", text, re.I): return None, None
|
||||
return None, None
|
||||
original_text = match.group(1).strip()
|
||||
|
||||
try:
|
||||
birthday = datetime.datetime.strptime(original_text.replace(",", ""), "%B %d %Y")
|
||||
except ValueError:
|
||||
print("[%s] BAD BIRTHDAY :(\n\n%s" % (bioguide, main))
|
||||
return None, original_text
|
||||
|
||||
birthday = "%04d-%02d-%02d" % (birthday.year, birthday.month, birthday.day)
|
||||
return birthday, original_text
|
||||
|
||||
# Process all profile data in the bioguide ZIP file.
|
||||
with zipfile.ZipFile(sys.argv[1]) as zf:
|
||||
for profile_fn in zf.namelist():
|
||||
bioguide_id = re.match(r"^([A-Z]\d+)\.json", profile_fn).group(1)
|
||||
if bioguide_id not in legislators:
|
||||
#print("No legislator for", bioguide_id)
|
||||
continue
|
||||
with zf.open(profile_fn) as zff:
|
||||
profile = json.load(zff)
|
||||
if "profileText" not in profile:
|
||||
continue
|
||||
|
||||
legislator = legislators[bioguide_id]
|
||||
|
||||
# Get birthday from text.
|
||||
birthday, original_text = parse_birthday_from_text(profile["profileText"])
|
||||
if birthday:
|
||||
|
||||
# Check birthday from metadata --- not as reliable.
|
||||
# Since the metadata may only have a year, only match
|
||||
# as much of the date string as it has.
|
||||
if profile.get("birthDate") and not profile.get("birthCirca"):
|
||||
if profile["birthDate"] != birthday[0:len(profile["birthDate"])]:
|
||||
print(bioguide_id, "metadata", repr(profile["birthDate"]), "doesn't match profile text", repr(original_text))
|
||||
else:
|
||||
# They match, so update.
|
||||
legislators.setdefault("bio", {})
|
||||
legislator["bio"]["birthday"] = birthday
|
||||
|
||||
|
||||
# Write out updated data files.
|
||||
for fn in legislator_data:
|
||||
with open("../legislators-{}.yaml".format(ft), "w") as f:
|
||||
rtyaml.dump(legislator_data[fn], f)
|
||||
|
||||
Reference in New Issue
Block a user