diff --git a/examples/search.ipynb b/examples/search.ipynb new file mode 100644 index 0000000..6209310 --- /dev/null +++ b/examples/search.ipynb @@ -0,0 +1,344 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "d815a387", + "metadata": {}, + "outputs": [], + "source": [ + "# !pip uninstall twitter-api-client -y\n", + "# !pip install twitter-api-client --no-cache-dir" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1ecf8cb", + "metadata": {}, + "outputs": [], + "source": [ + "from twitter.search import Search\n", + "import pandas as pd\n", + "\n", + "email, username, password = ..., ..., ...\n", + "search = Search(email, username, password)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "98c65601", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2023-05-18 21:20:12,075.075 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", + "2023-05-18 21:20:12,656.656 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", + "2023-05-18 21:20:13,452.452 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", + "2023-05-18 21:20:13,899.899 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", + "2023-05-18 21:20:14,539.539 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", + "2023-05-18 21:20:14,938.938 DEBUG: [\u001B[32msuccess\u001B[0m] returned 101 search results for \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_atiduser_idfull_textlanguser_urltweet_urlgeocoordinatesplace
02023-05-19 04:07:19+00:001659410380026773509809177430602776576@msdarlin_ JENNIFER HUDSON first considered al...enhttps://twitter.com/i/user/809177430602776576https://twitter.com/i/status/1659410380026773509NoneNoneNone
12023-05-19 03:50:16+00:00165940608857842892921226048Jennifer Hudson - Believe https://t.co/vjqlw52MjOenhttps://twitter.com/i/user/21226048https://twitter.com/i/status/1659406088578428929NoneNoneNone
22023-05-19 03:03:12+00:001659394245835255808174826024If Fantasia and Jennifer Hudson do this verzuz...enhttps://twitter.com/i/user/174826024https://twitter.com/i/status/1659394245835255808NoneNoneNone
32023-05-19 02:44:21+00:0016593894992211886091143382733001039873jennifer hudson acabou de postar uma foto e no...pthttps://twitter.com/i/user/1143382733001039873https://twitter.com/i/status/1659389499221188609NoneNoneNone
42023-05-19 02:41:35+00:0016593888051185786891342931884150464512Jennifer Hudsoncyhttps://twitter.com/i/user/1342931884150464512https://twitter.com/i/status/1659388805118578689NoneNoneNone
.................................
962023-05-17 20:57:29+00:001658939820574400516534285941I can’t 🤣🤣 https://t.co/2tiIyHrMb7enhttps://twitter.com/i/user/534285941https://twitter.com/i/status/1658939820574400516NoneNoneNone
972023-05-17 19:46:21+00:001658921918890758148417935020Idk ask her https://t.co/md7BJf59C2enhttps://twitter.com/i/user/417935020https://twitter.com/i/status/1658921918890758148NoneNoneNone
982023-05-17 19:09:50+00:0016589127309910097952384861195My best hip hop female Dj @ChainzMsDj Dancing ...enhttps://twitter.com/i/user/2384861195https://twitter.com/i/status/1658912730991009795NoneNoneNone
992023-05-17 16:56:52+00:00165887926923232051415733529Kelly will sing with D. Smooth\\n\\nThe Complete...enhttps://twitter.com/i/user/15733529https://twitter.com/i/status/1658879269232320514NoneNoneNone
1002023-05-17 14:20:31+00:00165883991927865344417230018my dream collab? gimme _____ and ______.enhttps://twitter.com/i/user/17230018https://twitter.com/i/status/1658839919278653444NoneNoneNone
\n", + "

101 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " created_at id user_id \n", + "0 2023-05-19 04:07:19+00:00 1659410380026773509 809177430602776576 \\\n", + "1 2023-05-19 03:50:16+00:00 1659406088578428929 21226048 \n", + "2 2023-05-19 03:03:12+00:00 1659394245835255808 174826024 \n", + "3 2023-05-19 02:44:21+00:00 1659389499221188609 1143382733001039873 \n", + "4 2023-05-19 02:41:35+00:00 1659388805118578689 1342931884150464512 \n", + ".. ... ... ... \n", + "96 2023-05-17 20:57:29+00:00 1658939820574400516 534285941 \n", + "97 2023-05-17 19:46:21+00:00 1658921918890758148 417935020 \n", + "98 2023-05-17 19:09:50+00:00 1658912730991009795 2384861195 \n", + "99 2023-05-17 16:56:52+00:00 1658879269232320514 15733529 \n", + "100 2023-05-17 14:20:31+00:00 1658839919278653444 17230018 \n", + "\n", + " full_text lang \n", + "0 @msdarlin_ JENNIFER HUDSON first considered al... en \\\n", + "1 Jennifer Hudson - Believe https://t.co/vjqlw52MjO en \n", + "2 If Fantasia and Jennifer Hudson do this verzuz... en \n", + "3 jennifer hudson acabou de postar uma foto e no... pt \n", + "4 Jennifer Hudson cy \n", + ".. ... ... \n", + "96 I can’t 🤣🤣 https://t.co/2tiIyHrMb7 en \n", + "97 Idk ask her https://t.co/md7BJf59C2 en \n", + "98 My best hip hop female Dj @ChainzMsDj Dancing ... en \n", + "99 Kelly will sing with D. Smooth\\n\\nThe Complete... en \n", + "100 my dream collab? gimme _____ and ______. en \n", + "\n", + " user_url \n", + "0 https://twitter.com/i/user/809177430602776576 \\\n", + "1 https://twitter.com/i/user/21226048 \n", + "2 https://twitter.com/i/user/174826024 \n", + "3 https://twitter.com/i/user/1143382733001039873 \n", + "4 https://twitter.com/i/user/1342931884150464512 \n", + ".. ... \n", + "96 https://twitter.com/i/user/534285941 \n", + "97 https://twitter.com/i/user/417935020 \n", + "98 https://twitter.com/i/user/2384861195 \n", + "99 https://twitter.com/i/user/15733529 \n", + "100 https://twitter.com/i/user/17230018 \n", + "\n", + " tweet_url geo coordinates place \n", + "0 https://twitter.com/i/status/1659410380026773509 None None None \n", + "1 https://twitter.com/i/status/1659406088578428929 None None None \n", + "2 https://twitter.com/i/status/1659394245835255808 None None None \n", + "3 https://twitter.com/i/status/1659389499221188609 None None None \n", + "4 https://twitter.com/i/status/1659388805118578689 None None None \n", + ".. ... ... ... ... \n", + "96 https://twitter.com/i/status/1658939820574400516 None None None \n", + "97 https://twitter.com/i/status/1658921918890758148 None None None \n", + "98 https://twitter.com/i/status/1658912730991009795 None None None \n", + "99 https://twitter.com/i/status/1658879269232320514 None None None \n", + "100 https://twitter.com/i/status/1658839919278653444 None None None \n", + "\n", + "[101 rows x 10 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "latest_results = search.run(\n", + " 'jennifer hudson since:2023-05-18',\n", + " limit=100,\n", + " latest=True, # get latest tweets only\n", + " retries=3,\n", + ")\n", + "\n", + "flat_results = [y for x in latest_results for y in x]\n", + "data = [r.get('globalObjects', {}).get('tweets', {})for r in flat_results]\n", + "\n", + "base= 'https://twitter.com/i'\n", + "\n", + "df = (\n", + " pd.DataFrame({k:v for d in data for k,v in d.items()})\n", + " .T\n", + " .assign(created_at = lambda x: pd.to_datetime(x['created_at'], format='%a %b %d %H:%M:%S %z %Y'))\n", + " .assign(user_url = lambda x: f\"{base}/user/\"+x['user_id_str'])\n", + " .assign(tweet_url = lambda x: f\"{base}/status/\"+x['id_str'] )\n", + " .sort_values('created_at',ascending=False)\n", + " .drop_duplicates('id')\n", + " .reset_index(drop=True)\n", + ")\n", + "\n", + "# sample df with a few cols of interest\n", + "sample = df[['created_at','id','user_id','full_text', 'lang',\n", + " 'user_url', 'tweet_url', 'geo', 'coordinates', 'place']]\n", + "\n", + "sample" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/readme.md b/readme.md index f944df8..0860f31 100644 --- a/readme.md +++ b/readme.md @@ -235,6 +235,7 @@ latest_results = search.run( 'ios android', limit=100, latest=True, # get latest tweets only + retries=3, ) general_results = search.run( @@ -246,6 +247,7 @@ general_results = search.run( 'cheese bread butter', 'ios android', limit=100, + retries=11, ) ``` diff --git a/setup.py b/setup.py index 9aa65d0..f610bfc 100644 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ install_requires = [ setup( name="twitter-api-client", - version="0.7.9", + version="0.8.0", python_requires=">=3.10.10", description="Twitter API", long_description=dedent(''' @@ -237,6 +237,7 @@ setup( 'ios android', limit=100, latest=True, # get latest tweets only + retries=3, ) general_results = search.run( @@ -248,6 +249,7 @@ setup( 'cheese bread butter', 'ios android', limit=100, + retries=11, ) ``` '''), diff --git a/twitter/search.py b/twitter/search.py index f471224..f534a3c 100644 --- a/twitter/search.py +++ b/twitter/search.py @@ -53,7 +53,7 @@ class Search: async def paginate(self, query: str, session: AsyncClient, config: dict, out: Path, **kwargs) -> list[ dict]: config['q'] = query - r, data, next_cursor = await self.backoff(lambda: self.get(session, config), query) + r, data, next_cursor = await self.backoff(lambda: self.get(session, config), query, **kwargs) all_data = [data] c = colors.pop() if colors else '' ids = set() @@ -65,7 +65,9 @@ class Search: logger.debug(f'{c}{query}{reset}') config['cursor'] = next_cursor - r, data, next_cursor = await self.backoff(lambda: self.get(session, config), query) + r, data, next_cursor = await self.backoff(lambda: self.get(session, config), query, **kwargs) + if r is None: + return all_data data['query'] = query (out / f'raw/{time.time_ns()}.json').write_text( orjson.dumps(data, option=orjson.OPT_INDENT_2).decode(), @@ -74,7 +76,8 @@ class Search: all_data.append(data) return all_data - async def backoff(self, fn, info, retries=12): + async def backoff(self, fn, info, **kwargs): + retries = kwargs.get('retries', 3) for i in range(retries + 1): try: r, data, next_cursor = await fn() @@ -84,7 +87,7 @@ class Search: except Exception as e: if i == retries: logger.debug(f'Max retries exceeded\n{e}') - return + return None, None, None t = 2 ** i + random.random() logger.debug(f'No data for: \u001b[1m{info}\u001b[0m | retrying in {f"{t:.2f}"} seconds\t\t{e}') time.sleep(t)