mirror of
https://github.com/unitedstates/congress-legislators.git
synced 2025-12-19 18:05:51 -05:00
Add new validation tests for the social media file (#823)
* Add some basic checks. * If a TWITTER_API_BEARER_TOKEN environment variable is set, query the Twitter API to check that Twitter usernames and IDs match, and if usernames are in canonical case. * Fix Rep. Coons's twitter_id to match the twitter handle. The existing ID corresponds to the Twitter handle SenCoonsOffice which also appears to be a correct account, but his website links to the ChrisCoons account. * Update various Twitter handles to TitleCase if the account itself uses TitleCase
This commit is contained in:
@@ -52,7 +52,7 @@
|
||||
thomas: '02291'
|
||||
govtrack: 412668
|
||||
social:
|
||||
twitter: senthomtillis
|
||||
twitter: SenThomTillis
|
||||
facebook: SenatorThomTillis
|
||||
youtube_id: UCUD9VGV4SSGWjGdbn37Ea2w
|
||||
twitter_id: 2964174789
|
||||
@@ -1545,7 +1545,7 @@
|
||||
thomas: '01708'
|
||||
govtrack: 400162
|
||||
social:
|
||||
twitter: RepraulGrijalva
|
||||
twitter: RepRaulGrijalva
|
||||
facebook: Rep.Grijalva
|
||||
youtube: raulgrijalvaaz07
|
||||
youtube_id: UC8JZLnXS21bhNbcvpoM8O7g
|
||||
@@ -1748,12 +1748,12 @@
|
||||
govtrack: 412390
|
||||
social:
|
||||
twitter: ChrisCoons
|
||||
twitter_id: 15324851
|
||||
facebook: senatorchriscoons
|
||||
youtube: senatorchriscoons
|
||||
youtube_id: UC2lOVbsddn1HIkcDnmCw6tA
|
||||
instagram: senatorchriscoons
|
||||
instagram_id: 25194715
|
||||
twitter_id: 435500714
|
||||
- id:
|
||||
bioguide: C001087
|
||||
thomas: '01989'
|
||||
@@ -2950,7 +2950,7 @@
|
||||
thomas: '01837'
|
||||
social:
|
||||
facebook: chrismurphyct
|
||||
twitter: senmurphyoffice
|
||||
twitter: SenMurphyOffice
|
||||
youtube: senchrismurphy
|
||||
youtube_id: UCbcEa40PIFpLpdDe06n3F3Q
|
||||
twitter_id: 2853793517
|
||||
@@ -3192,7 +3192,7 @@
|
||||
govtrack: 412625
|
||||
thomas: '02239'
|
||||
social:
|
||||
twitter: reprickallen
|
||||
twitter: RepRickAllen
|
||||
instagram: Rep_RickAllen
|
||||
instagram_id: 1666787116
|
||||
facebook: CongressmanRickAllen
|
||||
@@ -3221,7 +3221,7 @@
|
||||
govtrack: 412623
|
||||
thomas: '02237'
|
||||
social:
|
||||
twitter: congressmanhice
|
||||
twitter: CongressmanHice
|
||||
facebook: CongressmanJodyHice
|
||||
twitter_id: 2975091705
|
||||
youtube_id: UCzUMsS8DusN2QLpgweFthgQ
|
||||
@@ -3231,7 +3231,7 @@
|
||||
govtrack: 412615
|
||||
thomas: '02229'
|
||||
social:
|
||||
twitter: reppeteaguilar
|
||||
twitter: RepPeteAguilar
|
||||
facebook: reppeteaguilar
|
||||
twitter_id: 3018670151
|
||||
youtube_id: UCxwbFLOlKDsXrwizV5jah7g
|
||||
@@ -3249,7 +3249,7 @@
|
||||
govtrack: 412659
|
||||
thomas: '02274'
|
||||
social:
|
||||
twitter: staceyplaskett
|
||||
twitter: StaceyPlaskett
|
||||
facebook: repstaceyplaskett
|
||||
twitter_id: 2724095695
|
||||
youtube_id: UC3V7biFZHDFHDFZy6cCSZUA
|
||||
@@ -3347,7 +3347,7 @@
|
||||
govtrack: 412673
|
||||
social:
|
||||
facebook: reptrentkelly
|
||||
twitter: reptrentkelly
|
||||
twitter: RepTrentKelly
|
||||
twitter_id: 3317799825
|
||||
instagram: reptrentkelly
|
||||
youtube_id: UCtDrz-8tdg4ZgOAQHToSWaQ
|
||||
@@ -3390,7 +3390,7 @@
|
||||
govtrack: 412708
|
||||
social:
|
||||
facebook: repraskin
|
||||
twitter: repraskin
|
||||
twitter: RepRaskin
|
||||
twitter_id: 806906355214852096
|
||||
instagram: repraskin
|
||||
youtube_id: UChxDE6vho5BfBTOqiBsDmKg
|
||||
@@ -3408,7 +3408,7 @@
|
||||
govtrack: 412697
|
||||
social:
|
||||
facebook: RepCharlieCrist
|
||||
twitter: repcharliecrist
|
||||
twitter: RepCharlieCrist
|
||||
twitter_id: 816030424778543104
|
||||
instagram: repcharliecrist
|
||||
- id:
|
||||
@@ -3416,7 +3416,7 @@
|
||||
govtrack: 412691
|
||||
social:
|
||||
facebook: drnealdunnfl2
|
||||
twitter: drnealdunnfl2
|
||||
twitter: DrNealDunnFL2
|
||||
twitter_id: 815952318487298048
|
||||
youtube_id: UCRTGfrYP-RuDzJ2KGok7TRA
|
||||
- id:
|
||||
@@ -3441,7 +3441,7 @@
|
||||
govtrack: 412698
|
||||
social:
|
||||
facebook: repbrianmast
|
||||
twitter: repbrianmast
|
||||
twitter: RepBrianMast
|
||||
twitter_id: 814103950404239360
|
||||
youtube: repbrianmast
|
||||
instagram: RepBrianMast
|
||||
@@ -3495,7 +3495,7 @@
|
||||
govtrack: 412681
|
||||
social:
|
||||
facebook: SenatorCortezMasto
|
||||
twitter: sencortezmasto
|
||||
twitter: SenCortezMasto
|
||||
twitter_id: 811313565760163844
|
||||
youtube_id: UCip_83SiKUqwnUT57VOXrCg
|
||||
- id:
|
||||
@@ -3512,7 +3512,7 @@
|
||||
govtrack: 412723
|
||||
social:
|
||||
facebook: RepJennifferGonzalezColon
|
||||
twitter: repjenniffer
|
||||
twitter: RepJenniffer
|
||||
twitter_id: 819744763020775425
|
||||
youtube_id: UCZj99h3-GNKjGGeyp7AJeXw
|
||||
- id:
|
||||
@@ -3529,7 +3529,7 @@
|
||||
govtrack: 412703
|
||||
social:
|
||||
facebook: reptrey
|
||||
twitter: reptrey
|
||||
twitter: RepTrey
|
||||
twitter_id: 811986281177772032
|
||||
youtube_id: UCTgS8a_zLg8HpUnk5WXY5ag
|
||||
- id:
|
||||
@@ -3571,7 +3571,7 @@
|
||||
thomas: '02124'
|
||||
social:
|
||||
facebook: CongressmanBradSchneider
|
||||
twitter: repschneider
|
||||
twitter: RepSchneider
|
||||
twitter_id: 1071840474
|
||||
instagram: repschneider
|
||||
youtube: RepBradSchneider
|
||||
@@ -3596,7 +3596,7 @@
|
||||
govtrack: 412721
|
||||
social:
|
||||
facebook: repbrianfitzpatrick
|
||||
twitter: repbrianfitz
|
||||
twitter: RepBrianFitz
|
||||
twitter_id: 816303263586914304
|
||||
instagram: repbrianfitz
|
||||
youtube_id: UCYMNcgHss4_Q5JY2vSvMpgw
|
||||
@@ -3605,7 +3605,7 @@
|
||||
govtrack: 412682
|
||||
social:
|
||||
facebook: repohalleran
|
||||
twitter: repohalleran
|
||||
twitter: RepOHalleran
|
||||
twitter_id: 808416682972770304
|
||||
instagram: repohalleran
|
||||
- id:
|
||||
@@ -3628,7 +3628,7 @@
|
||||
govtrack: 412701
|
||||
social:
|
||||
facebook: congressmanraja
|
||||
twitter: congressmanraja
|
||||
twitter: CongressmanRaja
|
||||
twitter_id: 814179031956488192
|
||||
instagram: congressmanraja
|
||||
- id:
|
||||
@@ -3636,7 +3636,7 @@
|
||||
govtrack: 412680
|
||||
social:
|
||||
facebook: SenatorHassan
|
||||
twitter: Senatorhassan
|
||||
twitter: SenatorHassan
|
||||
twitter_id: 946549322
|
||||
- id:
|
||||
bioguide: E000297
|
||||
@@ -3652,7 +3652,7 @@
|
||||
govtrack: 412724
|
||||
social:
|
||||
facebook: RepDavidKustoff
|
||||
twitter: repdavidkustoff
|
||||
twitter: RepDavidKustoff
|
||||
twitter_id: 816012124505931780
|
||||
instagram: repdavidkustoff
|
||||
youtube_id: UCK6U4plOKKMrCjv_h59spGQ
|
||||
@@ -3740,7 +3740,7 @@
|
||||
bioguide: C001110
|
||||
govtrack: 412688
|
||||
social:
|
||||
twitter: reploucorrea
|
||||
twitter: RepLouCorrea
|
||||
facebook: RepLouCorrea
|
||||
twitter_id: 815985039485837312
|
||||
youtube_id: UCIKJJp4QJIDjnjodc23qKkw
|
||||
@@ -3749,7 +3749,7 @@
|
||||
bioguide: B001298
|
||||
govtrack: 412713
|
||||
social:
|
||||
twitter: repdonbacon
|
||||
twitter: RepDonBacon
|
||||
twitter_id: 818975124460335106
|
||||
facebook: RepDonBacon
|
||||
instagram: RepDonBacon
|
||||
@@ -3991,7 +3991,7 @@
|
||||
- id:
|
||||
bioguide: G000592
|
||||
social:
|
||||
twitter: repgolden
|
||||
twitter: RepGolden
|
||||
twitter_id: 1080891667308298240
|
||||
- id:
|
||||
bioguide: H001085
|
||||
@@ -4081,7 +4081,7 @@
|
||||
- id:
|
||||
bioguide: T000483
|
||||
social:
|
||||
twitter: repdavidtrone
|
||||
twitter: RepDavidTrone
|
||||
twitter_id: 1080573351914061825
|
||||
- id:
|
||||
bioguide: U000040
|
||||
@@ -4161,7 +4161,7 @@
|
||||
- id:
|
||||
bioguide: M001208
|
||||
social:
|
||||
twitter: replucymcbath
|
||||
twitter: RepLucyMcBath
|
||||
twitter_id: 1082380458976051202
|
||||
- id:
|
||||
bioguide: P000615
|
||||
@@ -4252,7 +4252,7 @@
|
||||
bioguide: G000061
|
||||
govtrack: 456792
|
||||
social:
|
||||
twitter: repmikegarcia
|
||||
twitter: RepMikeGarcia
|
||||
facebook: RepMikeGarcia
|
||||
twitter_id: 1262531473057423361
|
||||
- id:
|
||||
@@ -4700,7 +4700,7 @@
|
||||
bioguide: J000020
|
||||
govtrack: 456793
|
||||
social:
|
||||
twitter: repjacobs
|
||||
twitter: RepJacobs
|
||||
twitter_id: 1276232539510919168
|
||||
- id:
|
||||
bioguide: L000595
|
||||
|
||||
@@ -452,6 +452,52 @@ def check_committee_assignments():
|
||||
if c not in membership:
|
||||
print("committees-current.yaml", "No membership information for: " + c)
|
||||
|
||||
def check_social_media():
|
||||
# Check the social media file.
|
||||
|
||||
with open("legislators-social-media.yaml") as f:
|
||||
social_media = rtyaml.load(f)
|
||||
|
||||
# Get currently serving legislators.
|
||||
with open("legislators-current.yaml") as f:
|
||||
legislators_current = rtyaml.load(f)
|
||||
legislators_current = { p["id"]["bioguide"]: p for p in legislators_current }
|
||||
|
||||
for entry in social_media:
|
||||
# Check that the entry is for a currently serving legislator.
|
||||
p = legislators_current.get(entry["id"]["bioguide"])
|
||||
if not p:
|
||||
error("legislators-social-media.yaml", "Entry for non-current legislator: " + entry["id"]["bioguide"])
|
||||
continue
|
||||
|
||||
# Check that if the 'twitter' field is given that 'twitter_id' is also given,
|
||||
# and vice versa.
|
||||
if ("twitter" in entry["social"]) != ("twitter_id" in entry["social"]):
|
||||
error("legislators-social-media.yaml", "Entry has 'twitter' but not 'twitter_id' or vice versa: " + entry["id"]["bioguide"])
|
||||
|
||||
if "TWITTER_API_BEARER_TOKEN" in os.environ:
|
||||
import tweepy
|
||||
twitter = tweepy.Client(os.environ["TWITTER_API_BEARER_TOKEN"])
|
||||
|
||||
# Check that twitter user names matches twitter IDs.
|
||||
twitter_matches = [(entry["social"]["twitter"], entry["social"]["twitter_id"])
|
||||
for entry in social_media
|
||||
if "twitter" in entry["social"] and "twitter_id" in entry["social"]]
|
||||
while twitter_matches:
|
||||
tm = twitter_matches[0:100]
|
||||
twitter_matches = twitter_matches[100:]
|
||||
users_by_username = twitter.get_users(usernames=",".join(tmm[0] for tmm in tm))
|
||||
users_by_username = { user.username.lower(): user.id for user in users_by_username.data }
|
||||
users_by_id = twitter.get_users(ids=",".join(str(tmm[1]) for tmm in tm))
|
||||
users_by_id = { user.id: user.username for user in users_by_id.data }
|
||||
for username, uid in tm:
|
||||
if uid not in users_by_id or users_by_id[uid].lower() != username.lower():
|
||||
error("legislators-social-media.yaml", "Mismatch between Twitter username {} (ID={}) and ID {} (username={}).".format(
|
||||
username, users_by_username.get(username.lower(), "<invalid username>"), uid, users_by_id.get(uid, "<invalid ID>")))
|
||||
elif users_by_id[uid] != username and users_by_id[uid] != users_by_id[uid].lower():
|
||||
# Don't push to use the canonical case if the canonical case is all lowercase.
|
||||
error("legislators-social-media.yaml", "Non-canonical case for Twitter username {} (should be {}).".format(username, users_by_id[uid]))
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Check the legislators files.
|
||||
seen_ids = { }
|
||||
@@ -463,6 +509,7 @@ if __name__ == "__main__":
|
||||
check_id_uniqueness(seen_ids)
|
||||
check_district_offices()
|
||||
check_committee_assignments()
|
||||
check_social_media()
|
||||
|
||||
# Exit with exit status.
|
||||
sys.exit(0 if ok else 1)
|
||||
|
||||
Reference in New Issue
Block a user