fix download_media - TweetWithVisibilityResults and Tweet schemas differ

This commit is contained in:
Trevor Hobenshield
2024-04-21 15:27:01 -07:00
parent 26ade1bb14
commit c150f1a349
2 changed files with 8 additions and 10 deletions

View File

@@ -1,5 +1,5 @@
__title__ = "twitter-api-client" __title__ = "twitter-api-client"
__description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs." __description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs."
__version__ = "0.10.21" __version__ = "0.10.22"
__author__ = "Trevor Hobenshield" __author__ = "Trevor Hobenshield"
__license__ = "MIT" __license__ = "MIT"

View File

@@ -291,30 +291,28 @@ class Scraper:
media = {} media = {}
for data in tweets: for data in tweets:
for tweet in data.get('data', {}).get('tweetResult', []): for tweet in data.get('data', {}).get('tweetResult', []):
if _id := tweet.get('result', {}).get('rest_id'): # TweetWithVisibilityResults and Tweet have different structures
root = tweet.get('result', {}).get('tweet', {}) or tweet.get('result', {})
date = tweet.get('result', {}).get('legacy', {}).get('created_at', '') if _id := root.get('rest_id'):
uid = tweet.get('result', {}).get('legacy', {}).get('user_id_str', '') date = root.get('legacy', {}).get('created_at', '')
uid = root.get('legacy', {}).get('user_id_str', '')
media[_id] = {'date': date, 'uid': uid, 'img': set(), 'video': {'thumb': set(), 'video_info': {}, 'hq': set()}, 'card': []} media[_id] = {'date': date, 'uid': uid, 'img': set(), 'video': {'thumb': set(), 'video_info': {}, 'hq': set()}, 'card': []}
for _media in (y for x in find_key(root, 'media') for y in x if isinstance(x, list)):
for _media in (y for x in find_key(tweet['result'], 'media') for y in x if isinstance(x, list)):
if videos: if videos:
if vinfo := _media.get('video_info'): if vinfo := _media.get('video_info'):
hq = sorted(vinfo.get('variants', []), key=lambda x: -x.get('bitrate', 0))[0]['url'] hq = sorted(vinfo.get('variants', []), key=lambda x: -x.get('bitrate', 0))[0]['url']
media[_id]['video']['video_info'] |= vinfo media[_id]['video']['video_info'] |= vinfo
media[_id]['video']['hq'].add(hq) media[_id]['video']['hq'].add(hq)
if video_thumb: if video_thumb:
if url := _media.get('media_url_https', ''): if url := _media.get('media_url_https', ''):
media[_id]['video']['thumb'].add(url) media[_id]['video']['thumb'].add(url)
if photos: if photos:
if (url := _media.get('media_url_https', '')) and "_video_thumb" not in url: if (url := _media.get('media_url_https', '')) and "_video_thumb" not in url:
if hq_img_variant: if hq_img_variant:
url = f'{url}?name=orig' url = f'{url}?name=orig'
media[_id]['img'].add(url) media[_id]['img'].add(url)
if cards: if cards:
if card := tweet.get('result', {}).get('card', {}).get('legacy', {}): if card := root.get('card', {}).get('legacy', {}):
media[_id]['card'].extend(card.get('binding_values', [])) media[_id]['card'].extend(card.get('binding_values', []))
if metadata_out: if metadata_out:
media = set2list(media) media = set2list(media)