update out dir

This commit is contained in:
trevor hobenshield
2023-07-05 05:36:28 -07:00
parent 8c12082ce7
commit 2f17fd7675
3 changed files with 19 additions and 23 deletions

View File

@@ -1,8 +1,6 @@
import os
from codecs import open
from textwrap import dedent
from setuptools import find_packages, setup
from pathlib import Path
install_requires = [
"aiofiles",
@@ -15,9 +13,7 @@ install_requires = [
]
about = {}
here = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(here, "twitter", "__version__.py"), "r", "utf-8") as f:
exec(f.read(), about)
exec((Path().cwd() / 'twitter' / '__version__.py').read_text(), about)
setup(
name=about['__title__'],
@@ -146,10 +142,10 @@ setup(
# get bookmarks
bookmarks = account.bookmarks()
# get DM inbox metadata
# get DM inbox metadata
inbox = account.dm_inbox()
# get DMs from all conversations
# get DMs from all conversations
dms = account.dm_history()
# get DMs from specific conversations
@@ -311,7 +307,7 @@ setup(
#### Search
```python
```python
from twitter.search import Search
email, username, password = ..., ..., ...
@@ -430,23 +426,23 @@ setup(
```
### Automated Solvers
> This requires installation of the [proton-api-client](https://pypi.org/project/proton-api-client) package
To set up automated email confirmation/verification solvers, add your Proton Mail credentials below as shown.
This removes the need to manually solve email challenges via the web app. These credentials can be used
in `Scraper`, `Account`, and `Search` constructors.
E.g.
```python
from twitter.account import Account
from twitter.util import get_code
from proton.client import ProtonMail
proton_username, proton_password = ..., ...
proton = lambda: get_code(ProtonMail(proton_username, proton_password))
email, username, password = ..., ..., ...
account = Account(email, username, password, proton=proton)
```

View File

@@ -1,5 +1,5 @@
__title__ = "twitter-api-client"
__description__ = "Implementation of Twitter's v1, v2, and GraphQL APIs."
__version__ = "0.10.9"
__version__ = "0.10.10"
__author__ = "Trevor Hobenshield"
__license__ = "MIT"

View File

@@ -34,7 +34,7 @@ class Scraper:
self.save = kwargs.get('save', True)
self.debug = kwargs.get('debug', 0)
self.pbar = kwargs.get('pbar', True)
self.out_path = Path('data')
self.out = Path(kwargs.get('out', 'data'))
self.guest = False
self.logger = self._init_logger(**kwargs)
self.session = self._validate_session(email, username, password, session, **kwargs)
@@ -306,7 +306,7 @@ class Scraper:
return await asyncio.gather(*tasks)
trends = asyncio.run(process())
out = self.out_path / 'raw' / 'trends'
out = self.out / 'raw' / 'trends'
out.mkdir(parents=True, exist_ok=True)
(out / f'{time.time_ns()}.json').write_text(orjson.dumps(
{k: v for d in trends for k, v in d.items()},
@@ -441,7 +441,7 @@ class Scraper:
info = await self._init_chat(c, key['chat_token'])
chat = await self._get_chat(c, info['endpoint'], info['access_token'])
if self.save:
(self.out_path / 'raw' / f"chat_{key['rest_id']}.json").write_bytes(orjson.dumps(chat))
(self.out / 'raw' / f"chat_{key['rest_id']}.json").write_bytes(orjson.dumps(chat))
return {
'space': key['rest_id'],
'chat': chat,
@@ -449,7 +449,7 @@ class Scraper:
}
async def process():
(self.out_path / 'raw').mkdir(parents=True, exist_ok=True)
(self.out / 'raw').mkdir(parents=True, exist_ok=True)
limits = Limits(max_connections=100, max_keepalive_connections=10)
headers = self.session.headers if self.guest else get_headers(self.session)
cookies = self.session.cookies
@@ -484,7 +484,7 @@ class Scraper:
# ensure chunks are in correct order
for k, v in streams.items():
streams[k] = sorted(v, key=lambda x: int(re.findall('_(\d+)_\w\.aac$', x.url.path)[0]))
out = self.out_path / 'audio'
out = self.out / 'audio'
out.mkdir(parents=True, exist_ok=True)
for space_id, chunks in streams.items():
# 1hr ~= 50mb
@@ -533,7 +533,7 @@ class Scraper:
if self.debug:
log(self.logger, self.debug, r)
if self.save:
save_json(r, self.out_path, name, **kwargs)
save_json(r, self.out, name, **kwargs)
return r
async def _process(self, operation: tuple, queries: list[dict], **kwargs):
@@ -739,7 +739,7 @@ class Scraper:
if not playlist: return
chunks = await get_chunks(client, playlist['url'])
if not chunks: return
out = self.out_path / 'live'
out = self.out / 'live'
out.mkdir(parents=True, exist_ok=True)
async with aiofiles.open(out / f'{playlist["room"]}.aac', 'wb') as fp:
while curr < lim: