From dba1be4596e333ddc95116a8cc2b0aceaa56d7e0 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Mon, 17 Nov 2025 10:53:38 -0500 Subject: [PATCH] Add CSV alternative format for committee membership --- README.md | 2 +- scripts/alternate_bulk_formats.py | 49 ++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 121e6c8..3630ea6 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ File | Download | Description `legislators-historical` | [YAML](https://unitedstates.github.io/congress-legislators/legislators-historical.yaml) [JSON](https://unitedstates.github.io/congress-legislators/legislators-historical.json) [CSV](https://unitedstates.github.io/congress-legislators/legislators-historical.csv) | Historical Members of Congress (i.e. all Members of Congress except those in the current file). `legislators-social-media` | [YAML](https://unitedstates.github.io/congress-legislators/legislators-social-media.yaml) [JSON](https://unitedstates.github.io/congress-legislators/legislators-social-media.json) | Current social media accounts for Members of Congress. Official accounts only (no campaign or personal accounts). `committees-current` | [YAML](https://unitedstates.github.io/congress-legislators/committees-current.yaml) [JSON](https://unitedstates.github.io/congress-legislators/committees-current.json) | Current committees of the Congress, with subcommittees. -`committee-membership-current` | [YAML](https://unitedstates.github.io/congress-legislators/committee-membership-current.yaml) [JSON](https://unitedstates.github.io/congress-legislators/committee-membership-current.json) | Current committee/subcommittee assignments. +`committee-membership-current` | [YAML](https://unitedstates.github.io/congress-legislators/committee-membership-current.yaml) [JSON](https://unitedstates.github.io/congress-legislators/committee-membership-current.json) [CSV](https://unitedstates.github.io/congress-legislators/committee-membership-current.csv) | Current committee/subcommittee assignments. `committees-historical` | [YAML](https://unitedstates.github.io/congress-legislators/committees-historical.yaml) [JSON](https://unitedstates.github.io/congress-legislators/committees-historical.json) | Current and historical committees of the Congress, with subcommittees, from the 93rd Congress (1973) and on. `legislators-district-offices` | [YAML](https://unitedstates.github.io/congress-legislators/legislators-district-offices.yaml) [JSON](https://unitedstates.github.io/congress-legislators/legislators-district-offices.json) [CSV](https://unitedstates.github.io/congress-legislators/legislators-district-offices.csv) | District offices for current Members of Congress. `executive` | [YAML](https://unitedstates.github.io/congress-legislators/executive.yaml) [JSON](https://unitedstates.github.io/congress-legislators/executive.json) | Presidents and vice presidents. diff --git a/scripts/alternate_bulk_formats.py b/scripts/alternate_bulk_formats.py index a57a0b6..2e7551c 100755 --- a/scripts/alternate_bulk_formats.py +++ b/scripts/alternate_bulk_formats.py @@ -6,7 +6,7 @@ import os import utils -def generate_csv(): +def generate_legislator_csv(): #yaml filenames yamls = ["legislators-current.yaml","legislators-historical.yaml"] @@ -139,8 +139,6 @@ def generate_csv(): csv_output.writerow(legislator_row) - generate_district_office_csv() - def generate_district_office_csv(): filename = "legislators-district-offices.yaml" @@ -162,8 +160,7 @@ def generate_district_office_csv(): csv_output.writerow(office) -def generate_json(): - +def generate_legislator_json(): #yaml filenames yamls = list(map(os.path.basename, glob.glob("../*.yaml"))) @@ -184,7 +181,43 @@ def generate_json(): json.dumps(data, default=utils.format_datetime, indent=2), "../" + filename.replace(".yaml", ".json")) -if __name__ == '__main__': - generate_csv() - generate_json() +def generate_committee_membership_csv(): + filename = "committee-membership-current.yaml" + print("Converting %s to CSV..." % filename) + committee_membership = utils.load_data(filename) + fields = [ + "bioguide", "name", + "committee_id", "committee_type", "committee_name", "committee_subcommittee_name", + "party", "title", "rank", "chamber", + ] + + committes = utils.load_data("committees-current.yaml") + committes = { committee["thomas_id"]: committee + for committee in committes } + for committee in list(committes.values()): + committee["id"] = committee["thomas_id"] + for subcommittee in committee.get("subcommittees", []): + subcommittee["id"] = committee["thomas_id"] + subcommittee["thomas_id"] + subcommittee["type"] = committee["type"] + " subcommittee" + subcommittee["subcommittee_name"] = subcommittee["name"] + subcommittee["name"] = committee["name"] + committes[subcommittee["id"]] = subcommittee + committee_keys = ["id", "type", "name", "subcommittee_name"] + + f = open("../" + filename.replace(".yaml", ".csv"), "w") + csv_output = csv.DictWriter(f, fieldnames=fields) + csv_output.writeheader() + + for committee_id, members in committee_membership.items(): + for member in members: + for key in committee_keys: + member["committee_" + key] = committes[committee_id].get(key, "") + csv_output.writerow(member) + + +if __name__ == '__main__': + generate_legislator_csv() + generate_district_office_csv() + generate_legislator_json() + generate_committee_membership_csv()