Add new validation tests for the social media file (#823)

* Add some basic checks.
* If a TWITTER_API_BEARER_TOKEN environment variable is set, query the Twitter API to check that Twitter usernames and IDs match, and if usernames are in canonical case.
* Fix Rep. Coons's twitter_id to match the twitter handle. The existing ID corresponds to the Twitter handle SenCoonsOffice which also appears to be a correct account, but his website links to the ChrisCoons account.
* Update various Twitter handles to TitleCase if the account itself uses TitleCase
This commit is contained in:
Joshua Tauberer
2022-03-21 23:37:36 -04:00
committed by GitHub
parent 91bbca7f7e
commit 27ab666b2d
2 changed files with 76 additions and 29 deletions

View File

@@ -52,7 +52,7 @@
thomas: '02291'
govtrack: 412668
social:
twitter: senthomtillis
twitter: SenThomTillis
facebook: SenatorThomTillis
youtube_id: UCUD9VGV4SSGWjGdbn37Ea2w
twitter_id: 2964174789
@@ -1545,7 +1545,7 @@
thomas: '01708'
govtrack: 400162
social:
twitter: RepraulGrijalva
twitter: RepRaulGrijalva
facebook: Rep.Grijalva
youtube: raulgrijalvaaz07
youtube_id: UC8JZLnXS21bhNbcvpoM8O7g
@@ -1748,12 +1748,12 @@
govtrack: 412390
social:
twitter: ChrisCoons
twitter_id: 15324851
facebook: senatorchriscoons
youtube: senatorchriscoons
youtube_id: UC2lOVbsddn1HIkcDnmCw6tA
instagram: senatorchriscoons
instagram_id: 25194715
twitter_id: 435500714
- id:
bioguide: C001087
thomas: '01989'
@@ -2950,7 +2950,7 @@
thomas: '01837'
social:
facebook: chrismurphyct
twitter: senmurphyoffice
twitter: SenMurphyOffice
youtube: senchrismurphy
youtube_id: UCbcEa40PIFpLpdDe06n3F3Q
twitter_id: 2853793517
@@ -3192,7 +3192,7 @@
govtrack: 412625
thomas: '02239'
social:
twitter: reprickallen
twitter: RepRickAllen
instagram: Rep_RickAllen
instagram_id: 1666787116
facebook: CongressmanRickAllen
@@ -3221,7 +3221,7 @@
govtrack: 412623
thomas: '02237'
social:
twitter: congressmanhice
twitter: CongressmanHice
facebook: CongressmanJodyHice
twitter_id: 2975091705
youtube_id: UCzUMsS8DusN2QLpgweFthgQ
@@ -3231,7 +3231,7 @@
govtrack: 412615
thomas: '02229'
social:
twitter: reppeteaguilar
twitter: RepPeteAguilar
facebook: reppeteaguilar
twitter_id: 3018670151
youtube_id: UCxwbFLOlKDsXrwizV5jah7g
@@ -3249,7 +3249,7 @@
govtrack: 412659
thomas: '02274'
social:
twitter: staceyplaskett
twitter: StaceyPlaskett
facebook: repstaceyplaskett
twitter_id: 2724095695
youtube_id: UC3V7biFZHDFHDFZy6cCSZUA
@@ -3347,7 +3347,7 @@
govtrack: 412673
social:
facebook: reptrentkelly
twitter: reptrentkelly
twitter: RepTrentKelly
twitter_id: 3317799825
instagram: reptrentkelly
youtube_id: UCtDrz-8tdg4ZgOAQHToSWaQ
@@ -3390,7 +3390,7 @@
govtrack: 412708
social:
facebook: repraskin
twitter: repraskin
twitter: RepRaskin
twitter_id: 806906355214852096
instagram: repraskin
youtube_id: UChxDE6vho5BfBTOqiBsDmKg
@@ -3408,7 +3408,7 @@
govtrack: 412697
social:
facebook: RepCharlieCrist
twitter: repcharliecrist
twitter: RepCharlieCrist
twitter_id: 816030424778543104
instagram: repcharliecrist
- id:
@@ -3416,7 +3416,7 @@
govtrack: 412691
social:
facebook: drnealdunnfl2
twitter: drnealdunnfl2
twitter: DrNealDunnFL2
twitter_id: 815952318487298048
youtube_id: UCRTGfrYP-RuDzJ2KGok7TRA
- id:
@@ -3441,7 +3441,7 @@
govtrack: 412698
social:
facebook: repbrianmast
twitter: repbrianmast
twitter: RepBrianMast
twitter_id: 814103950404239360
youtube: repbrianmast
instagram: RepBrianMast
@@ -3495,7 +3495,7 @@
govtrack: 412681
social:
facebook: SenatorCortezMasto
twitter: sencortezmasto
twitter: SenCortezMasto
twitter_id: 811313565760163844
youtube_id: UCip_83SiKUqwnUT57VOXrCg
- id:
@@ -3512,7 +3512,7 @@
govtrack: 412723
social:
facebook: RepJennifferGonzalezColon
twitter: repjenniffer
twitter: RepJenniffer
twitter_id: 819744763020775425
youtube_id: UCZj99h3-GNKjGGeyp7AJeXw
- id:
@@ -3529,7 +3529,7 @@
govtrack: 412703
social:
facebook: reptrey
twitter: reptrey
twitter: RepTrey
twitter_id: 811986281177772032
youtube_id: UCTgS8a_zLg8HpUnk5WXY5ag
- id:
@@ -3571,7 +3571,7 @@
thomas: '02124'
social:
facebook: CongressmanBradSchneider
twitter: repschneider
twitter: RepSchneider
twitter_id: 1071840474
instagram: repschneider
youtube: RepBradSchneider
@@ -3596,7 +3596,7 @@
govtrack: 412721
social:
facebook: repbrianfitzpatrick
twitter: repbrianfitz
twitter: RepBrianFitz
twitter_id: 816303263586914304
instagram: repbrianfitz
youtube_id: UCYMNcgHss4_Q5JY2vSvMpgw
@@ -3605,7 +3605,7 @@
govtrack: 412682
social:
facebook: repohalleran
twitter: repohalleran
twitter: RepOHalleran
twitter_id: 808416682972770304
instagram: repohalleran
- id:
@@ -3628,7 +3628,7 @@
govtrack: 412701
social:
facebook: congressmanraja
twitter: congressmanraja
twitter: CongressmanRaja
twitter_id: 814179031956488192
instagram: congressmanraja
- id:
@@ -3636,7 +3636,7 @@
govtrack: 412680
social:
facebook: SenatorHassan
twitter: Senatorhassan
twitter: SenatorHassan
twitter_id: 946549322
- id:
bioguide: E000297
@@ -3652,7 +3652,7 @@
govtrack: 412724
social:
facebook: RepDavidKustoff
twitter: repdavidkustoff
twitter: RepDavidKustoff
twitter_id: 816012124505931780
instagram: repdavidkustoff
youtube_id: UCK6U4plOKKMrCjv_h59spGQ
@@ -3740,7 +3740,7 @@
bioguide: C001110
govtrack: 412688
social:
twitter: reploucorrea
twitter: RepLouCorrea
facebook: RepLouCorrea
twitter_id: 815985039485837312
youtube_id: UCIKJJp4QJIDjnjodc23qKkw
@@ -3749,7 +3749,7 @@
bioguide: B001298
govtrack: 412713
social:
twitter: repdonbacon
twitter: RepDonBacon
twitter_id: 818975124460335106
facebook: RepDonBacon
instagram: RepDonBacon
@@ -3991,7 +3991,7 @@
- id:
bioguide: G000592
social:
twitter: repgolden
twitter: RepGolden
twitter_id: 1080891667308298240
- id:
bioguide: H001085
@@ -4081,7 +4081,7 @@
- id:
bioguide: T000483
social:
twitter: repdavidtrone
twitter: RepDavidTrone
twitter_id: 1080573351914061825
- id:
bioguide: U000040
@@ -4161,7 +4161,7 @@
- id:
bioguide: M001208
social:
twitter: replucymcbath
twitter: RepLucyMcBath
twitter_id: 1082380458976051202
- id:
bioguide: P000615
@@ -4252,7 +4252,7 @@
bioguide: G000061
govtrack: 456792
social:
twitter: repmikegarcia
twitter: RepMikeGarcia
facebook: RepMikeGarcia
twitter_id: 1262531473057423361
- id:
@@ -4700,7 +4700,7 @@
bioguide: J000020
govtrack: 456793
social:
twitter: repjacobs
twitter: RepJacobs
twitter_id: 1276232539510919168
- id:
bioguide: L000595

View File

@@ -452,6 +452,52 @@ def check_committee_assignments():
if c not in membership:
print("committees-current.yaml", "No membership information for: " + c)
def check_social_media():
# Check the social media file.
with open("legislators-social-media.yaml") as f:
social_media = rtyaml.load(f)
# Get currently serving legislators.
with open("legislators-current.yaml") as f:
legislators_current = rtyaml.load(f)
legislators_current = { p["id"]["bioguide"]: p for p in legislators_current }
for entry in social_media:
# Check that the entry is for a currently serving legislator.
p = legislators_current.get(entry["id"]["bioguide"])
if not p:
error("legislators-social-media.yaml", "Entry for non-current legislator: " + entry["id"]["bioguide"])
continue
# Check that if the 'twitter' field is given that 'twitter_id' is also given,
# and vice versa.
if ("twitter" in entry["social"]) != ("twitter_id" in entry["social"]):
error("legislators-social-media.yaml", "Entry has 'twitter' but not 'twitter_id' or vice versa: " + entry["id"]["bioguide"])
if "TWITTER_API_BEARER_TOKEN" in os.environ:
import tweepy
twitter = tweepy.Client(os.environ["TWITTER_API_BEARER_TOKEN"])
# Check that twitter user names matches twitter IDs.
twitter_matches = [(entry["social"]["twitter"], entry["social"]["twitter_id"])
for entry in social_media
if "twitter" in entry["social"] and "twitter_id" in entry["social"]]
while twitter_matches:
tm = twitter_matches[0:100]
twitter_matches = twitter_matches[100:]
users_by_username = twitter.get_users(usernames=",".join(tmm[0] for tmm in tm))
users_by_username = { user.username.lower(): user.id for user in users_by_username.data }
users_by_id = twitter.get_users(ids=",".join(str(tmm[1]) for tmm in tm))
users_by_id = { user.id: user.username for user in users_by_id.data }
for username, uid in tm:
if uid not in users_by_id or users_by_id[uid].lower() != username.lower():
error("legislators-social-media.yaml", "Mismatch between Twitter username {} (ID={}) and ID {} (username={}).".format(
username, users_by_username.get(username.lower(), "<invalid username>"), uid, users_by_id.get(uid, "<invalid ID>")))
elif users_by_id[uid] != username and users_by_id[uid] != users_by_id[uid].lower():
# Don't push to use the canonical case if the canonical case is all lowercase.
error("legislators-social-media.yaml", "Non-canonical case for Twitter username {} (should be {}).".format(username, users_by_id[uid]))
if __name__ == "__main__":
# Check the legislators files.
seen_ids = { }
@@ -463,6 +509,7 @@ if __name__ == "__main__":
check_id_uniqueness(seen_ids)
check_district_offices()
check_committee_assignments()
check_social_media()
# Exit with exit status.
sys.exit(0 if ok else 1)