update download_media

2025-12-25 02:03:15 -05:00 · 2023-11-08 16:58:23 -08:00
parent 87b00bdcc7
commit 8abfc00077
4 changed files with 23 additions and 14 deletions
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@ setup(
    license=about['__license__'],
    long_description=dedent('''

-    ## Implementation of Twitter's v1, v2, and GraphQL APIs
+    ## Implementation of X/Twitter v1, v2, and GraphQL APIs.


    ## Table of Contents
--- a/twitter/version.py
+++ b/twitter/version.py
@@ -1,5 +1,5 @@
 __title__ = "twitter-api-client"
-__description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs."
-__version__ = "0.10.11"
+__description__ = "Implementation of X/Twitter v1, v2, and GraphQL APIs."
+__version__ = "0.10.12"
 __author__ = "Trevor Hobenshield"
 __license__ = "MIT"
--- a/twitter/scraper.py
+++ b/twitter/scraper.py
@@ -230,8 +230,8 @@ class Scraper:
        """
        return self._run(Operation.UserByRestId, user_ids, **kwargs)

-    def download_media(self, ids: list[int], photos: bool = True, videos: bool = True,
-                       chunk_size: int = 8192) -> None:
+    def download_media(self, ids: list[int], photos: bool = True, videos: bool = True, chunk_size: int = 8192,
+                       stream: bool = False) -> None:
        """
        Download media from tweets by tweet ids.

@@ -239,6 +239,7 @@ class Scraper:
        @param photos: flag to include photos
        @param videos: flag to include videos
        @param chunk_size: chunk size for download
+        @params stream: flag to enable downloading raw stream
        @return: None
        """
        out = Path('media')
@@ -259,19 +260,27 @@ class Scraper:

        async def process():
            async with AsyncClient(headers=self.session.headers, cookies=self.session.cookies) as client:
-                tasks = (download(client, x, y) for x, y in urls)
+                tasks = (download(client, x, y, stream) for x, y in urls)
                if self.pbar:
                    return await tqdm_asyncio.gather(*tasks, desc='Downloading media')
                return await asyncio.gather(*tasks)

-        async def download(client: AsyncClient, post_url: str, cdn_url: str) -> None:
-            name = urlsplit(post_url).path.replace('/', '_')[1:]
-            ext = urlsplit(cdn_url).path.split('/')[-1]
+        async def download(client: AsyncClient, post_url: str, cdn_url: str, stream: bool = False) -> None:
            try:
-                r = await client.get(cdn_url)
-                async with aiofiles.open(out / f'{name}_{ext}', 'wb') as fp:
-                    for chunk in r.iter_bytes(chunk_size=chunk_size):
-                        await fp.write(chunk)
+                name = urlsplit(post_url).path.replace('/', '_')[1:]
+                ext = urlsplit(cdn_url).path.split('/')[-1]
+                fname = out / f'{name}_{ext}'
+                if stream:
+                    async with aiofiles.open(fname, 'wb') as fp:
+                        async with client.stream('GET', cdn_url) as r:
+                            async for chunk in r.aiter_raw(chunk_size):
+                                await fp.write(chunk)
+                else:
+                    r = await client.get(cdn_url)
+                    async with aiofiles.open(fname, 'wb') as fp:
+                        for chunk in r.iter_bytes(chunk_size):
+                            await fp.write(chunk)
+
            except Exception as e:
                self.logger.error(f'[{RED}error{RESET}] Failed to download media: {post_url} {e}')

--- a/twitter/util.py
+++ b/twitter/util.py
@@ -231,7 +231,7 @@ def get_code(cls, retries=5) -> str | None:
    def poll_inbox():
        inbox = cls.inbox()
        for c in inbox.get('Conversations', []):
-            if c['Senders'][0]['Address'] == 'info@twitter.com':
+            if c['Senders'][0]['Address'] in {'info@twitter.com', 'info@x.com'}:
                exprs = ['Your Twitter confirmation code is (.+)', '(.+) is your Twitter verification code']
                if temp := list(filter(None, (re.search(expr, c['Subject']) for expr in exprs))):
                    return temp[0].group(1)