update download_media

This commit is contained in:
Trevor Hobenshield
2023-11-08 16:58:23 -08:00
parent 87b00bdcc7
commit 8abfc00077
4 changed files with 23 additions and 14 deletions

View File

@@ -23,7 +23,7 @@ setup(
license=about['__license__'],
long_description=dedent('''
## Implementation of Twitter's v1, v2, and GraphQL APIs
## Implementation of X/Twitter v1, v2, and GraphQL APIs.
## Table of Contents

View File

@@ -1,5 +1,5 @@
__title__ = "twitter-api-client"
__description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs."
__version__ = "0.10.11"
__description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs."
__version__ = "0.10.12"
__author__ = "Trevor Hobenshield"
__license__ = "MIT"

View File

@@ -230,8 +230,8 @@ class Scraper:
"""
return self._run(Operation.UserByRestId, user_ids, **kwargs)
def download_media(self, ids: list[int], photos: bool = True, videos: bool = True,
chunk_size: int = 8192) -> None:
def download_media(self, ids: list[int], photos: bool = True, videos: bool = True, chunk_size: int = 8192,
stream: bool = False) -> None:
"""
Download media from tweets by tweet ids.
@@ -239,6 +239,7 @@ class Scraper:
@param photos: flag to include photos
@param videos: flag to include videos
@param chunk_size: chunk size for download
@params stream: flag to enable downloading raw stream
@return: None
"""
out = Path('media')
@@ -259,19 +260,27 @@ class Scraper:
async def process():
async with AsyncClient(headers=self.session.headers, cookies=self.session.cookies) as client:
tasks = (download(client, x, y) for x, y in urls)
tasks = (download(client, x, y, stream) for x, y in urls)
if self.pbar:
return await tqdm_asyncio.gather(*tasks, desc='Downloading media')
return await asyncio.gather(*tasks)
async def download(client: AsyncClient, post_url: str, cdn_url: str) -> None:
name = urlsplit(post_url).path.replace('/', '_')[1:]
ext = urlsplit(cdn_url).path.split('/')[-1]
async def download(client: AsyncClient, post_url: str, cdn_url: str, stream: bool = False) -> None:
try:
r = await client.get(cdn_url)
async with aiofiles.open(out / f'{name}_{ext}', 'wb') as fp:
for chunk in r.iter_bytes(chunk_size=chunk_size):
await fp.write(chunk)
name = urlsplit(post_url).path.replace('/', '_')[1:]
ext = urlsplit(cdn_url).path.split('/')[-1]
fname = out / f'{name}_{ext}'
if stream:
async with aiofiles.open(fname, 'wb') as fp:
async with client.stream('GET', cdn_url) as r:
async for chunk in r.aiter_raw(chunk_size):
await fp.write(chunk)
else:
r = await client.get(cdn_url)
async with aiofiles.open(fname, 'wb') as fp:
for chunk in r.iter_bytes(chunk_size):
await fp.write(chunk)
except Exception as e:
self.logger.error(f'[{RED}error{RESET}] Failed to download media: {post_url} {e}')

View File

@@ -231,7 +231,7 @@ def get_code(cls, retries=5) -> str | None:
def poll_inbox():
inbox = cls.inbox()
for c in inbox.get('Conversations', []):
if c['Senders'][0]['Address'] == 'info@twitter.com':
if c['Senders'][0]['Address'] in {'info@twitter.com', 'info@x.com'}:
exprs = ['Your Twitter confirmation code is (.+)', '(.+) is your Twitter verification code']
if temp := list(filter(None, (re.search(expr, c['Subject']) for expr in exprs))):
return temp[0].group(1)