diff --git a/examples/chat_log.ipynb b/examples/chat_log.ipynb deleted file mode 100644 index 94351db..0000000 --- a/examples/chat_log.ipynb +++ /dev/null @@ -1,401 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "> Note: structure of GraphQL response is not consistent, these examples may not work in all cases." - ], - "metadata": { - "collapsed": false - }, - "id": "fce8131509380867" - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "6c99787b", - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "import pandas as pd\n", - "from twitter.scraper import Scraper\n", - "from twitter.util import *" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dd3b7a57", - "metadata": {}, - "outputs": [], - "source": [ - "scraper = Scraper(session=init_session())" - ] - }, - { - "cell_type": "markdown", - "id": "17a91f72", - "metadata": {}, - "source": [ - "### get chat log" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "597b3a0f", - "metadata": {}, - "outputs": [], - "source": [ - "room_id = '1eaJbrAPnBVJX'\n", - "spaces = scraper.spaces(rooms=[room_id], audio=0, chat=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "faaa76b1", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
roomtimestamptwitter_idusernamebody
01eaJbrAPnBVJX2023-05-25 21:32:191106321031566893057jimfarley98
11eaJbrAPnBVJX2023-05-25 21:32:551106321031566893057jimfarley98
21eaJbrAPnBVJX2023-05-25 21:33:011106321031566893057jimfarley98
31eaJbrAPnBVJX2023-05-25 21:33:131106321031566893057jimfarley98Hi Alan, are you there?
41eaJbrAPnBVJX2023-05-25 21:33:1744196397elonmuskI am.
..................
2511eaJbrAPnBVJX2023-05-25 22:00:001106321031566893057jimfarley98Yeah, it's a great platform.
2521eaJbrAPnBVJX2023-05-25 22:00:0344196397elonmuskAlright, cool. Sounds good. Thank you.
2531eaJbrAPnBVJX2023-05-25 22:00:031106321031566893057jimfarley98OK. Thank you.
2541eaJbrAPnBVJX2023-05-25 22:00:0444196397elonmuskRight.
2551eaJbrAPnBVJX2023-05-25 22:00:051106321031566893057jimfarley98Bye.
\n", - "

256 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " room timestamp twitter_id username \n", - "0 1eaJbrAPnBVJX 2023-05-25 21:32:19 1106321031566893057 jimfarley98 \\\n", - "1 1eaJbrAPnBVJX 2023-05-25 21:32:55 1106321031566893057 jimfarley98 \n", - "2 1eaJbrAPnBVJX 2023-05-25 21:33:01 1106321031566893057 jimfarley98 \n", - "3 1eaJbrAPnBVJX 2023-05-25 21:33:13 1106321031566893057 jimfarley98 \n", - "4 1eaJbrAPnBVJX 2023-05-25 21:33:17 44196397 elonmusk \n", - ".. ... ... ... ... \n", - "251 1eaJbrAPnBVJX 2023-05-25 22:00:00 1106321031566893057 jimfarley98 \n", - "252 1eaJbrAPnBVJX 2023-05-25 22:00:03 44196397 elonmusk \n", - "253 1eaJbrAPnBVJX 2023-05-25 22:00:03 1106321031566893057 jimfarley98 \n", - "254 1eaJbrAPnBVJX 2023-05-25 22:00:04 44196397 elonmusk \n", - "255 1eaJbrAPnBVJX 2023-05-25 22:00:05 1106321031566893057 jimfarley98 \n", - "\n", - " body \n", - "0 \n", - "1 \n", - "2 \n", - "3 Hi Alan, are you there? \n", - "4 I am. \n", - ".. ... \n", - "251 Yeah, it's a great platform. \n", - "252 Alright, cool. Sounds good. Thank you. \n", - "253 OK. Thank you. \n", - "254 Right. \n", - "255 Bye. \n", - "\n", - "[256 rows x 5 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "chat = pd.json_normalize(spaces[0]['chat'])\n", - "chat = chat[chat['payload.body.final'] == True]\n", - "dates = ['payload.body.timestamp']\n", - "chat[dates] = chat[dates].apply(pd.to_datetime, unit='ms').apply(lambda x: x.dt.strftime(\"%Y-%m-%d %H:%M:%S %z\"))\n", - "chat = chat.sort_values('payload.body.timestamp').reset_index(drop=True)\n", - "chat = chat[[\n", - " 'payload.room',\n", - " 'payload.body.timestamp',\n", - " 'payload.sender.twitter_id',\n", - " 'payload.body.username',\n", - " 'payload.body.body',\n", - "]]\n", - "chat.columns = chat.columns.str.replace('(payload|body|sender).','',regex=True).str.replace('.','_')\n", - "# chat.to_csv(f'{room_id}.csv',index=False)\n", - "chat" - ] - }, - { - "cell_type": "markdown", - "id": "118919dd", - "metadata": {}, - "source": [ - "### query chat" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "3010c52a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
roomtimestamptwitter_idusernamebody
221eaJbrAPnBVJX2023-05-25 21:35:0544196397elonmuskYeah, well, well, it's certainly super excitin...
301eaJbrAPnBVJX2023-05-25 21:36:0044196397elonmuskAPI access so like you know like a Ford vehicl...
331eaJbrAPnBVJX2023-05-25 21:36:2344196397elonmuskAnd we're very, very much appreciative of of F...
471eaJbrAPnBVJX2023-05-25 21:38:2944196397elonmuskBut but I think it is the the the teams have d...
1911eaJbrAPnBVJX2023-05-25 21:53:5244196397elonmuskYeah, I agree with that. We should probably no...
2111eaJbrAPnBVJX2023-05-25 21:56:1344196397elonmuskSo, you know, it is certainly the Tesla intent...
2481eaJbrAPnBVJX2023-05-25 21:59:4544196397elonmuskLikewise there, it's an honor to be working wi...
\n", - "
" - ], - "text/plain": [ - " room timestamp twitter_id username \n", - "22 1eaJbrAPnBVJX 2023-05-25 21:35:05 44196397 elonmusk \\\n", - "30 1eaJbrAPnBVJX 2023-05-25 21:36:00 44196397 elonmusk \n", - "33 1eaJbrAPnBVJX 2023-05-25 21:36:23 44196397 elonmusk \n", - "47 1eaJbrAPnBVJX 2023-05-25 21:38:29 44196397 elonmusk \n", - "191 1eaJbrAPnBVJX 2023-05-25 21:53:52 44196397 elonmusk \n", - "211 1eaJbrAPnBVJX 2023-05-25 21:56:13 44196397 elonmusk \n", - "248 1eaJbrAPnBVJX 2023-05-25 21:59:45 44196397 elonmusk \n", - "\n", - " body \n", - "22 Yeah, well, well, it's certainly super excitin... \n", - "30 API access so like you know like a Ford vehicl... \n", - "33 And we're very, very much appreciative of of F... \n", - "47 But but I think it is the the the teams have d... \n", - "191 Yeah, I agree with that. We should probably no... \n", - "211 So, you know, it is certainly the Tesla intent... \n", - "248 Likewise there, it's an honor to be working wi... " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "flags = re.I\n", - "(chat\n", - " .query('body.str.contains(\"\\sford\",regex=True,flags=@flags)')\n", - " .query('username.str.contains(\"elonmusk\",regex=True,flags=@flags)')\n", - ")\n", - "\n", - "## alternatively\n", - "# chat[\n", - "# chat.body.str.contains('\\sford',regex=True,flags=re.I)\n", - "# &\n", - "# chat.username.str.contains('elonmusk',regex=True,flags=re.I)\n", - "# ]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/example.ipynb b/examples/example.ipynb new file mode 100644 index 0000000..6e88ad9 --- /dev/null +++ b/examples/example.ipynb @@ -0,0 +1,1584 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "b3505281", + "metadata": {}, + "source": [ + "### Parsing X data\n", + "\n", + "This example is to help illustrate the structure of the response data\n", + "\n", + "**Notes**:\n", + "- The code is extremely slow due to constant calls to `.apply()`\n", + "- The GraphQL response data is not consistent, the following examples may not work in all cases." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "411feb89", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from twitter.scraper import Scraper\n", + "\n", + "scraper = Scraper(\n", + " cookies={\n", + " 'auth_token':...,\n", + " 'ct0':...,\n", + " }\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "e6f3ab15", + "metadata": {}, + "source": [ + "### Likes" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "5e8b7adb", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Likes: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:02<00:00, 2.40s/it]\n", + "/tmp/ipykernel_11828/237363432.py:20: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id_strid_strcreated_atbookmark_countfavorite_countfull_textpossibly_sensitivequote_countreply_countretweet_count
0137695187235602432517763200044655823312024-04-05 18:44:26+00:00248338Schedule-Free Learning\\nhttps://t.co/HEl6dOcT2...False101356
1327890640117762714593522280622024-04-05 15:31:32+00:00474We’ve teamed up with PayPal to offer MetaMask ...NaN063
2455831492717762327143638715862024-04-05 12:57:34+00:0029309I like to think of myself as a researcher, but...NaN277
31323583217760808840197001082024-04-05 02:54:15+00:0040932Someone should make a leaderboard for how freq...NaN114758
4170311784893009920017760415617108378332024-04-05 00:18:00+00:0093311\"Young woman with blonde hair wearing a backpa...NaN84011
.................................
1123479236917408804127953797852023-12-29 23:40:09+00:0010377Dive into the best of 2023 with Porch Potty on...False11399
113232952671417399587542196310312023-12-27 10:37:48+00:00107820857What happened in 1970? \\n\\nNixon signed the 19...False3577397573
114577871216994467038570455402023-09-06 15:37:23+00:00135784another way of thinking about this — much of m...False144634
1151857077216420441213419192322023-04-01 06:00:00+00:0058011854As Beyoncé once said, “If you like it, then yo...NaN1237772321
1162987366213836162746939514942021-04-18 03:00:00+00:0026467730Never. Ever. Buy a tech product based on the p...NaN51314354066
\n", + "

117 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " user_id_str id_str created_at \\\n", + "0 1376951872356024325 1776320004465582331 2024-04-05 18:44:26+00:00 \n", + "1 3278906401 1776271459352228062 2024-04-05 15:31:32+00:00 \n", + "2 4558314927 1776232714363871586 2024-04-05 12:57:34+00:00 \n", + "3 13235832 1776080884019700108 2024-04-05 02:54:15+00:00 \n", + "4 1703117848930099200 1776041561710837833 2024-04-05 00:18:00+00:00 \n", + ".. ... ... ... \n", + "112 34792369 1740880412795379785 2023-12-29 23:40:09+00:00 \n", + "113 2329526714 1739958754219631031 2023-12-27 10:37:48+00:00 \n", + "114 5778712 1699446703857045540 2023-09-06 15:37:23+00:00 \n", + "115 18570772 1642044121341919232 2023-04-01 06:00:00+00:00 \n", + "116 29873662 1383616274693951494 2021-04-18 03:00:00+00:00 \n", + "\n", + " bookmark_count favorite_count \\\n", + "0 248 338 \n", + "1 4 74 \n", + "2 29 309 \n", + "3 40 932 \n", + "4 93 311 \n", + ".. ... ... \n", + "112 10 377 \n", + "113 1078 20857 \n", + "114 135 784 \n", + "115 580 11854 \n", + "116 264 67730 \n", + "\n", + " full_text possibly_sensitive \\\n", + "0 Schedule-Free Learning\\nhttps://t.co/HEl6dOcT2... False \n", + "1 We’ve teamed up with PayPal to offer MetaMask ... NaN \n", + "2 I like to think of myself as a researcher, but... NaN \n", + "3 Someone should make a leaderboard for how freq... NaN \n", + "4 \"Young woman with blonde hair wearing a backpa... NaN \n", + ".. ... ... \n", + "112 Dive into the best of 2023 with Porch Potty on... False \n", + "113 What happened in 1970? \\n\\nNixon signed the 19... False \n", + "114 another way of thinking about this — much of m... False \n", + "115 As Beyoncé once said, “If you like it, then yo... NaN \n", + "116 Never. Ever. Buy a tech product based on the p... NaN \n", + "\n", + " quote_count reply_count retweet_count \n", + "0 10 13 56 \n", + "1 0 6 3 \n", + "2 2 7 7 \n", + "3 11 47 58 \n", + "4 8 40 11 \n", + ".. ... ... ... \n", + "112 1 13 99 \n", + "113 357 739 7573 \n", + "114 14 46 34 \n", + "115 123 777 2321 \n", + "116 513 1435 4066 \n", + "\n", + "[117 rows x 10 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "likes = scraper.likes([33836629],count=100,limit=100)\n", + "\n", + "df_likes = (\n", + " pd.json_normalize(likes, record_path=['data','user','result','timeline_v2','timeline','instructions'])\n", + " ['entries']\n", + " .explode()\n", + " .apply(pd.Series)\n", + " ['content']\n", + " .apply(pd.Series)\n", + " .pipe(lambda df:df[df['__typename']!='TimelineTimelineCursor'])\n", + " ['itemContent']\n", + " .apply(pd.Series)\n", + " ['tweet_results']\n", + " .apply(pd.Series)\n", + " ['result']\n", + " .apply(pd.Series)\n", + " .pipe(lambda df:df[df['__typename']!='TweetWithVisibilityResults'])\n", + " ['legacy']\n", + " .apply(pd.Series)\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n", + " .sort_values('created_at', ascending=False)\n", + " .reset_index(drop=True)\n", + " [[\n", + " 'user_id_str',\n", + " 'id_str',\n", + " 'created_at',\n", + " 'bookmark_count',\n", + " 'favorite_count',\n", + " 'full_text',\n", + " 'possibly_sensitive',\n", + " 'quote_count',\n", + " 'reply_count',\n", + " 'retweet_count'\n", + " ]]\n", + ")\n", + "df_likes\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "id": "57dc49fa", + "metadata": {}, + "source": [ + "### Tweet Details" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b8abf4e7", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "TweetDetail: 100%|██████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00, 4.06s/it]\n", + "/tmp/ipykernel_11828/4204966796.py:28: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
created_atid_struser_id_strfull_textfavorite_counturlshashtagsuser_mentions
02024-04-05 16:27:24+00:001776285521603363107307810043Integrate Advanced Image and Video Customizati...0[][{'indices': [105, 112], 'text': 'nodejs'}, {'...[]
12024-04-05 16:27:24+00:001776285521569865904307810043Integrate Advanced Image and Video Customizati...0[][{'indices': [105, 112], 'text': 'nodejs'}, {'...[]
22024-04-05 16:27:24+00:001776285521569865904307810043Integrate Advanced Image and Video Customizati...0[][{'indices': [105, 112], 'text': 'nodejs'}, {'...[]
32024-04-05 16:27:24+00:001776285521603363107307810043Integrate Advanced Image and Video Customizati...0[][{'indices': [105, 112], 'text': 'nodejs'}, {'...[]
42024-04-04 18:39:56+00:0017759564853248493191594005431164321793Embark on a magical morning adventure with eve...85[][][]
52024-04-01 17:24:10+00:0017748502544684362771773621435199586304@karpathy @__tinygrad__ @realGeorgeHotz Recent...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
62024-04-01 02:46:28+00:0017746293751215391261371119512313950210@karpathy @__tinygrad__ @realGeorgeHotz yeah I...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
72024-03-31 00:20:12+00:0017742301752381974561316295394146410497@karpathy @__tinygrad__ @realGeorgeHotz Colab ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
82024-03-29 23:47:06+00:001773859460139855938911238967030403073@karpathy @__tinygrad__ @realGeorgeHotz I was ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
92024-03-29 01:23:42+00:0017735213821877170671231466530375970816@karpathy @__tinygrad__ @realGeorgeHotz 👀0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
102024-03-25 13:33:50+00:0017722555740061533651635022673167122433@karpathy @__tinygrad__ @realGeorgeHotz Hi! I ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
112024-03-24 13:01:04+00:00177188494251705969116598957@karpathy @__tinygrad__ @realGeorgeHotz What’s...3[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
122024-03-21 16:41:59+00:0017708533705777358633236488643@karpathy @__tinygrad__ @realGeorgeHotz Now th...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
132024-03-21 10:31:30+00:001770760137063494060120717378@karpathy @__tinygrad__ @realGeorgeHotz It wou...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
142024-03-20 20:47:15+00:0017705527071973133911478897931876921345@karpathy @__tinygrad__ @realGeorgeHotz Geohot...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
152024-03-20 15:29:54+00:0017704728425612780551745002150818639872@karpathy @__tinygrad__ @realGeorgeHotz cancel...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
162024-03-20 15:13:00+00:0017704685902541703381728044152389079040@karpathy @__tinygrad__ @realGeorgeHotz All I ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
172024-03-20 13:42:26+00:0017704458008264049951435107115@karpathy @__tinygrad__ @realGeorgeHotz Maybe ...3[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
182024-03-20 12:01:00+00:001770420272707056005228920764@karpathy @__tinygrad__ @realGeorgeHotz Is Gho...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
192024-03-20 11:48:41+00:0017704171749825825191698500379086872576@karpathy @__tinygrad__ @realGeorgeHotz I wond...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
202024-03-20 07:46:32+00:0017703562323958911201324369511710154754@karpathy @__tinygrad__ @realGeorgeHotz Andrej...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
212024-03-20 07:41:46+00:0017703550356726624901203777178124591104@karpathy @__tinygrad__ @realGeorgeHotz go geo...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
222024-03-20 05:04:40+00:0017703154997772125773062411848@karpathy @__tinygrad__ @realGeorgeHotz make a...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
232024-03-20 04:21:56+00:00177030474427338369089958280@karpathy @__tinygrad__ @realGeorgeHotz @readw...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
242024-03-20 03:27:07+00:0017702909478860680101245941997497704450@karpathy @__tinygrad__ @realGeorgeHotz based ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
252024-03-20 03:11:18+00:0017702869702598330041610077781945159681@karpathy @__tinygrad__ @realGeorgeHotz he's a...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
262024-03-20 02:49:20+00:00177028144098850854482467998@karpathy @__tinygrad__ @realGeorgeHotz @abaca...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
272024-03-20 02:38:31+00:0017702787199421687121724580443004215296@karpathy @__tinygrad__ @realGeorgeHotz I love...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
282024-03-20 00:41:02+00:0017702491550315479251765646589794607104@karpathy @__tinygrad__ @realGeorgeHotz I now ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
292024-03-20 00:01:19+00:001770239159703122342246453725@karpathy @__tinygrad__ @realGeorgeHotz woo An...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
302024-03-19 21:45:05+00:001770204873331470581269968418@karpathy @__tinygrad__ @realGeorgeHotz 🙏0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
312024-03-19 20:42:21+00:0017701890883955388801263378123808075776@karpathy @__tinygrad__ @realGeorgeHotz Would ...4[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
322024-03-19 20:28:11+00:0017701855225660706721051571484466900992@karpathy @__tinygrad__ @realGeorgeHotz mainst...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
332024-03-19 20:14:52+00:0017701821690984163531283700032512770048@karpathy @__tinygrad__ @realGeorgeHotz He mak...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
342024-03-19 19:38:05+00:0017701729126553397331195245650746626048@karpathy @__tinygrad__ @realGeorgeHotz karpat...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
352024-03-19 19:33:41+00:0017701718046166428891187420166373687297@karpathy @__tinygrad__ @realGeorgeHotz Noted0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
362024-03-19 19:30:45+00:0017701710668343873611512519426679218185@karpathy @__tinygrad__ @realGeorgeHotz Nothin...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
372024-03-19 19:18:12+00:0017701679088589947961290932959084675073@karpathy @__tinygrad__ @realGeorgeHotz A true...2[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
382024-03-19 19:17:56+00:00177016784370472167623503850@karpathy @__tinygrad__ @realGeorgeHotz watchi...17[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
392024-03-19 19:17:24+00:001770167709470241108996745767384698885@karpathy @__tinygrad__ @realGeorgeHotz Andrej...2[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
402024-03-19 19:14:55+00:0017701670848292990801689409906594463744@karpathy @__tinygrad__ @realGeorgeHotz image ...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
412024-03-19 19:14:09+00:0017701668901171530881041076799386648576@karpathy @__tinygrad__ @realGeorgeHotz Taps s...0[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
422024-03-19 19:13:43+00:0017701667812332668254483798048@karpathy @__tinygrad__ @realGeorgeHotz Andrej...12[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
432024-03-19 19:13:17+00:00177016667306605819533836629@hiberfile_sys @__tinygrad__ @realGeorgeHotz l...170[][][{'id_str': '7780132', 'name': 'Charles Smith'...
442024-03-19 19:11:28+00:0017701662166083217447780132@karpathy @__tinygrad__ @realGeorgeHotz I was ...80[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
452024-03-19 19:11:01+00:0017701661006484236191488855033479376902@karpathy @__tinygrad__ @realGeorgeHotz that’s...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
462024-03-19 19:09:56+00:0017701658301955484521963466798@karpathy @__tinygrad__ @realGeorgeHotz I coul...122[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
472024-03-19 19:08:25+00:0017701654473604510431017486609707667456@karpathy @__tinygrad__ @realGeorgeHotz this b...10[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
482024-03-19 19:08:12+00:0017701653942857241121674187388825006082@karpathy @realGeorgeHotz Thanks!\\n\\nYea it's ...288[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
492024-03-19 19:07:06+00:0017701651180614082806470462@karpathy @__tinygrad__ @realGeorgeHotz You sh...22[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
502024-03-19 19:06:54+00:0017701650651587179461308474693339447296@karpathy @__tinygrad__ @realGeorgeHotz geohot...72[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
512024-03-19 19:06:18+00:0017701649157786052651518031440302284802@karpathy @__tinygrad__ @realGeorgeHotz Has an...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
522024-03-19 19:06:06+00:0017701648642179485251695180583695949824@karpathy @__tinygrad__ @realGeorgeHotz Who’s ...1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
532024-03-19 19:04:54+00:00177016456193027300924418897@karpathy @__tinygrad__ @realGeorgeHotz 👀1[][][{'id_str': '33836629', 'name': 'Andrej Karpat...
\n", + "
" + ], + "text/plain": [ + " created_at id_str user_id_str \\\n", + "0 2024-04-05 16:27:24+00:00 1776285521603363107 307810043 \n", + "1 2024-04-05 16:27:24+00:00 1776285521569865904 307810043 \n", + "2 2024-04-05 16:27:24+00:00 1776285521569865904 307810043 \n", + "3 2024-04-05 16:27:24+00:00 1776285521603363107 307810043 \n", + "4 2024-04-04 18:39:56+00:00 1775956485324849319 1594005431164321793 \n", + "5 2024-04-01 17:24:10+00:00 1774850254468436277 1773621435199586304 \n", + "6 2024-04-01 02:46:28+00:00 1774629375121539126 1371119512313950210 \n", + "7 2024-03-31 00:20:12+00:00 1774230175238197456 1316295394146410497 \n", + "8 2024-03-29 23:47:06+00:00 1773859460139855938 911238967030403073 \n", + "9 2024-03-29 01:23:42+00:00 1773521382187717067 1231466530375970816 \n", + "10 2024-03-25 13:33:50+00:00 1772255574006153365 1635022673167122433 \n", + "11 2024-03-24 13:01:04+00:00 1771884942517059691 16598957 \n", + "12 2024-03-21 16:41:59+00:00 1770853370577735863 3236488643 \n", + "13 2024-03-21 10:31:30+00:00 1770760137063494060 120717378 \n", + "14 2024-03-20 20:47:15+00:00 1770552707197313391 1478897931876921345 \n", + "15 2024-03-20 15:29:54+00:00 1770472842561278055 1745002150818639872 \n", + "16 2024-03-20 15:13:00+00:00 1770468590254170338 1728044152389079040 \n", + "17 2024-03-20 13:42:26+00:00 1770445800826404995 1435107115 \n", + "18 2024-03-20 12:01:00+00:00 1770420272707056005 228920764 \n", + "19 2024-03-20 11:48:41+00:00 1770417174982582519 1698500379086872576 \n", + "20 2024-03-20 07:46:32+00:00 1770356232395891120 1324369511710154754 \n", + "21 2024-03-20 07:41:46+00:00 1770355035672662490 1203777178124591104 \n", + "22 2024-03-20 05:04:40+00:00 1770315499777212577 3062411848 \n", + "23 2024-03-20 04:21:56+00:00 1770304744273383690 89958280 \n", + "24 2024-03-20 03:27:07+00:00 1770290947886068010 1245941997497704450 \n", + "25 2024-03-20 03:11:18+00:00 1770286970259833004 1610077781945159681 \n", + "26 2024-03-20 02:49:20+00:00 1770281440988508544 82467998 \n", + "27 2024-03-20 02:38:31+00:00 1770278719942168712 1724580443004215296 \n", + "28 2024-03-20 00:41:02+00:00 1770249155031547925 1765646589794607104 \n", + "29 2024-03-20 00:01:19+00:00 1770239159703122342 246453725 \n", + "30 2024-03-19 21:45:05+00:00 1770204873331470581 269968418 \n", + "31 2024-03-19 20:42:21+00:00 1770189088395538880 1263378123808075776 \n", + "32 2024-03-19 20:28:11+00:00 1770185522566070672 1051571484466900992 \n", + "33 2024-03-19 20:14:52+00:00 1770182169098416353 1283700032512770048 \n", + "34 2024-03-19 19:38:05+00:00 1770172912655339733 1195245650746626048 \n", + "35 2024-03-19 19:33:41+00:00 1770171804616642889 1187420166373687297 \n", + "36 2024-03-19 19:30:45+00:00 1770171066834387361 1512519426679218185 \n", + "37 2024-03-19 19:18:12+00:00 1770167908858994796 1290932959084675073 \n", + "38 2024-03-19 19:17:56+00:00 1770167843704721676 23503850 \n", + "39 2024-03-19 19:17:24+00:00 1770167709470241108 996745767384698885 \n", + "40 2024-03-19 19:14:55+00:00 1770167084829299080 1689409906594463744 \n", + "41 2024-03-19 19:14:09+00:00 1770166890117153088 1041076799386648576 \n", + "42 2024-03-19 19:13:43+00:00 1770166781233266825 4483798048 \n", + "43 2024-03-19 19:13:17+00:00 1770166673066058195 33836629 \n", + "44 2024-03-19 19:11:28+00:00 1770166216608321744 7780132 \n", + "45 2024-03-19 19:11:01+00:00 1770166100648423619 1488855033479376902 \n", + "46 2024-03-19 19:09:56+00:00 1770165830195548452 1963466798 \n", + "47 2024-03-19 19:08:25+00:00 1770165447360451043 1017486609707667456 \n", + "48 2024-03-19 19:08:12+00:00 1770165394285724112 1674187388825006082 \n", + "49 2024-03-19 19:07:06+00:00 1770165118061408280 6470462 \n", + "50 2024-03-19 19:06:54+00:00 1770165065158717946 1308474693339447296 \n", + "51 2024-03-19 19:06:18+00:00 1770164915778605265 1518031440302284802 \n", + "52 2024-03-19 19:06:06+00:00 1770164864217948525 1695180583695949824 \n", + "53 2024-03-19 19:04:54+00:00 1770164561930273009 24418897 \n", + "\n", + " full_text favorite_count urls \\\n", + "0 Integrate Advanced Image and Video Customizati... 0 [] \n", + "1 Integrate Advanced Image and Video Customizati... 0 [] \n", + "2 Integrate Advanced Image and Video Customizati... 0 [] \n", + "3 Integrate Advanced Image and Video Customizati... 0 [] \n", + "4 Embark on a magical morning adventure with eve... 85 [] \n", + "5 @karpathy @__tinygrad__ @realGeorgeHotz Recent... 0 [] \n", + "6 @karpathy @__tinygrad__ @realGeorgeHotz yeah I... 0 [] \n", + "7 @karpathy @__tinygrad__ @realGeorgeHotz Colab ... 0 [] \n", + "8 @karpathy @__tinygrad__ @realGeorgeHotz I was ... 0 [] \n", + "9 @karpathy @__tinygrad__ @realGeorgeHotz 👀 0 [] \n", + "10 @karpathy @__tinygrad__ @realGeorgeHotz Hi! I ... 0 [] \n", + "11 @karpathy @__tinygrad__ @realGeorgeHotz What’s... 3 [] \n", + "12 @karpathy @__tinygrad__ @realGeorgeHotz Now th... 0 [] \n", + "13 @karpathy @__tinygrad__ @realGeorgeHotz It wou... 0 [] \n", + "14 @karpathy @__tinygrad__ @realGeorgeHotz Geohot... 0 [] \n", + "15 @karpathy @__tinygrad__ @realGeorgeHotz cancel... 0 [] \n", + "16 @karpathy @__tinygrad__ @realGeorgeHotz All I ... 0 [] \n", + "17 @karpathy @__tinygrad__ @realGeorgeHotz Maybe ... 3 [] \n", + "18 @karpathy @__tinygrad__ @realGeorgeHotz Is Gho... 0 [] \n", + "19 @karpathy @__tinygrad__ @realGeorgeHotz I wond... 1 [] \n", + "20 @karpathy @__tinygrad__ @realGeorgeHotz Andrej... 0 [] \n", + "21 @karpathy @__tinygrad__ @realGeorgeHotz go geo... 0 [] \n", + "22 @karpathy @__tinygrad__ @realGeorgeHotz make a... 0 [] \n", + "23 @karpathy @__tinygrad__ @realGeorgeHotz @readw... 1 [] \n", + "24 @karpathy @__tinygrad__ @realGeorgeHotz based ... 0 [] \n", + "25 @karpathy @__tinygrad__ @realGeorgeHotz he's a... 1 [] \n", + "26 @karpathy @__tinygrad__ @realGeorgeHotz @abaca... 0 [] \n", + "27 @karpathy @__tinygrad__ @realGeorgeHotz I love... 1 [] \n", + "28 @karpathy @__tinygrad__ @realGeorgeHotz I now ... 0 [] \n", + "29 @karpathy @__tinygrad__ @realGeorgeHotz woo An... 0 [] \n", + "30 @karpathy @__tinygrad__ @realGeorgeHotz 🙏 0 [] \n", + "31 @karpathy @__tinygrad__ @realGeorgeHotz Would ... 4 [] \n", + "32 @karpathy @__tinygrad__ @realGeorgeHotz mainst... 0 [] \n", + "33 @karpathy @__tinygrad__ @realGeorgeHotz He mak... 0 [] \n", + "34 @karpathy @__tinygrad__ @realGeorgeHotz karpat... 0 [] \n", + "35 @karpathy @__tinygrad__ @realGeorgeHotz Noted 0 [] \n", + "36 @karpathy @__tinygrad__ @realGeorgeHotz Nothin... 0 [] \n", + "37 @karpathy @__tinygrad__ @realGeorgeHotz A true... 2 [] \n", + "38 @karpathy @__tinygrad__ @realGeorgeHotz watchi... 17 [] \n", + "39 @karpathy @__tinygrad__ @realGeorgeHotz Andrej... 2 [] \n", + "40 @karpathy @__tinygrad__ @realGeorgeHotz image ... 0 [] \n", + "41 @karpathy @__tinygrad__ @realGeorgeHotz Taps s... 0 [] \n", + "42 @karpathy @__tinygrad__ @realGeorgeHotz Andrej... 12 [] \n", + "43 @hiberfile_sys @__tinygrad__ @realGeorgeHotz l... 170 [] \n", + "44 @karpathy @__tinygrad__ @realGeorgeHotz I was ... 80 [] \n", + "45 @karpathy @__tinygrad__ @realGeorgeHotz that’s... 1 [] \n", + "46 @karpathy @__tinygrad__ @realGeorgeHotz I coul... 122 [] \n", + "47 @karpathy @__tinygrad__ @realGeorgeHotz this b... 10 [] \n", + "48 @karpathy @realGeorgeHotz Thanks!\\n\\nYea it's ... 288 [] \n", + "49 @karpathy @__tinygrad__ @realGeorgeHotz You sh... 22 [] \n", + "50 @karpathy @__tinygrad__ @realGeorgeHotz geohot... 72 [] \n", + "51 @karpathy @__tinygrad__ @realGeorgeHotz Has an... 1 [] \n", + "52 @karpathy @__tinygrad__ @realGeorgeHotz Who’s ... 1 [] \n", + "53 @karpathy @__tinygrad__ @realGeorgeHotz 👀 1 [] \n", + "\n", + " hashtags \\\n", + "0 [{'indices': [105, 112], 'text': 'nodejs'}, {'... \n", + "1 [{'indices': [105, 112], 'text': 'nodejs'}, {'... \n", + "2 [{'indices': [105, 112], 'text': 'nodejs'}, {'... \n", + "3 [{'indices': [105, 112], 'text': 'nodejs'}, {'... \n", + "4 [] \n", + "5 [] \n", + "6 [] \n", + "7 [] \n", + "8 [] \n", + "9 [] \n", + "10 [] \n", + "11 [] \n", + "12 [] \n", + "13 [] \n", + "14 [] \n", + "15 [] \n", + "16 [] \n", + "17 [] \n", + "18 [] \n", + "19 [] \n", + "20 [] \n", + "21 [] \n", + "22 [] \n", + "23 [] \n", + "24 [] \n", + "25 [] \n", + "26 [] \n", + "27 [] \n", + "28 [] \n", + "29 [] \n", + "30 [] \n", + "31 [] \n", + "32 [] \n", + "33 [] \n", + "34 [] \n", + "35 [] \n", + "36 [] \n", + "37 [] \n", + "38 [] \n", + "39 [] \n", + "40 [] \n", + "41 [] \n", + "42 [] \n", + "43 [] \n", + "44 [] \n", + "45 [] \n", + "46 [] \n", + "47 [] \n", + "48 [] \n", + "49 [] \n", + "50 [] \n", + "51 [] \n", + "52 [] \n", + "53 [] \n", + "\n", + " user_mentions \n", + "0 [] \n", + "1 [] \n", + "2 [] \n", + "3 [] \n", + "4 [] \n", + "5 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "6 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "7 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "8 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "9 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "10 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "11 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "12 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "13 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "14 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "15 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "16 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "17 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "18 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "19 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "20 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "21 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "22 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "23 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "24 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "25 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "26 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "27 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "28 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "29 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "30 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "31 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "32 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "33 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "34 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "35 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "36 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "37 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "38 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "39 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "40 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "41 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "42 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "43 [{'id_str': '7780132', 'name': 'Charles Smith'... \n", + "44 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "45 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "46 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "47 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "48 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "49 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "50 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "51 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "52 [{'id_str': '33836629', 'name': 'Andrej Karpat... \n", + "53 [{'id_str': '33836629', 'name': 'Andrej Karpat... " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "tweets_details = scraper.tweets_details([1770164518758633590], count=100, limit=100)\n", + "\n", + "df_tweets_details = (\n", + " pd.json_normalize(tweets_details,record_path=['data','threaded_conversation_with_injections_v2','instructions'])\n", + " ['entries']\n", + " .dropna()\n", + " .explode()\n", + " .apply(pd.Series)\n", + " ['content']\n", + " .apply(pd.Series)\n", + " ['items']\n", + " .dropna()\n", + " .explode()\n", + " .apply(pd.Series)\n", + " ['item']\n", + " .apply(pd.Series)\n", + " ['itemContent']\n", + " .apply(pd.Series)\n", + " .pipe(lambda df:df[df['__typename']!='TimelineTimelineCursor'])\n", + " ['tweet_results']\n", + " .apply(pd.Series)\n", + " ['result']\n", + " .apply(pd.Series)\n", + " .pipe(lambda df:df[df['__typename']!='TweetWithVisibilityResults'])\n", + " ['legacy']\n", + " .apply(pd.Series)\n", + " .pipe(lambda x: pd.concat([x, x['entities'].apply(pd.Series)],axis=1))\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n", + " .sort_values('created_at', ascending=False)\n", + " .reset_index(drop=True)\n", + " .drop('entities',axis=1)\n", + " [[\n", + " 'created_at',\n", + " 'id_str',\n", + " 'user_id_str',\n", + " 'full_text',\n", + " 'favorite_count',\n", + " 'urls',\n", + " 'hashtags',\n", + " 'user_mentions',\n", + " ]]\n", + "\n", + ")\n", + "df_tweets_details\n" + ] + }, + { + "cell_type": "markdown", + "id": "c95a637b", + "metadata": {}, + "source": [ + "### Favoriters" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "cb7cd271", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Favoriters: 100%|███████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00, 1.22s/it]\n", + "/tmp/ipykernel_11828/1064589959.py:19: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rest_idcreated_atscreen_namelocationfollowers_countstatuses_countfavourites_countmedia_countstatuses_count
017741498705525800962024-03-30 19:01:21+00:00k_weissgerber4.00.018.00.00.0
117728301837974405132024-03-27 03:37:25+00:00kategerrald32.0173.0408.00.0173.0
217722867245778370562024-03-25 15:37:39+00:00DaringDott899440.01.056.00.01.0
317717857677293895682024-03-24 06:27:00+00:00ImeranceL331402.01.060.00.01.0
417717822468376780802024-03-24 06:13:01+00:00UsHarper778132.01.066.00.01.0
..............................
163150407902008-06-07 19:23:56+00:00ssarohaSan Jose, CA80.0599.01777.06.0599.0
164145783962008-04-29 01:12:13+00:00tim_waggonerPlanet Earth495.0332.0337.016.0332.0
16564526922007-05-30 20:13:53+00:00goodwintercrop338.0972.03874.048.0972.0
16651413812007-04-18 17:46:19+00:00hangelMedellin, Colombia493.09853.016483.0669.09853.0
167712832006-12-15 17:45:30+00:00mkampFrankfurt, Germany451.01181.010758.017.01181.0
\n", + "

168 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " rest_id created_at screen_name \\\n", + "0 1774149870552580096 2024-03-30 19:01:21+00:00 k_weissgerber \n", + "1 1772830183797440513 2024-03-27 03:37:25+00:00 kategerrald \n", + "2 1772286724577837056 2024-03-25 15:37:39+00:00 DaringDott89944 \n", + "3 1771785767729389568 2024-03-24 06:27:00+00:00 ImeranceL33140 \n", + "4 1771782246837678080 2024-03-24 06:13:01+00:00 UsHarper77813 \n", + ".. ... ... ... \n", + "163 15040790 2008-06-07 19:23:56+00:00 ssaroha \n", + "164 14578396 2008-04-29 01:12:13+00:00 tim_waggoner \n", + "165 6452692 2007-05-30 20:13:53+00:00 goodwintercrop \n", + "166 5141381 2007-04-18 17:46:19+00:00 hangel \n", + "167 71283 2006-12-15 17:45:30+00:00 mkamp \n", + "\n", + " location followers_count statuses_count favourites_count \\\n", + "0 4.0 0.0 18.0 \n", + "1 32.0 173.0 408.0 \n", + "2 0.0 1.0 56.0 \n", + "3 2.0 1.0 60.0 \n", + "4 2.0 1.0 66.0 \n", + ".. ... ... ... ... \n", + "163 San Jose, CA 80.0 599.0 1777.0 \n", + "164 Planet Earth 495.0 332.0 337.0 \n", + "165 338.0 972.0 3874.0 \n", + "166 Medellin, Colombia 493.0 9853.0 16483.0 \n", + "167 Frankfurt, Germany 451.0 1181.0 10758.0 \n", + "\n", + " media_count statuses_count \n", + "0 0.0 0.0 \n", + "1 0.0 173.0 \n", + "2 0.0 1.0 \n", + "3 0.0 1.0 \n", + "4 0.0 1.0 \n", + ".. ... ... \n", + "163 6.0 599.0 \n", + "164 16.0 332.0 \n", + "165 48.0 972.0 \n", + "166 669.0 9853.0 \n", + "167 17.0 1181.0 \n", + "\n", + "[168 rows x 9 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "favoriters = scraper.favoriters([1770164518758633590], count=100, limit=100)\n", + "\n", + "df_favoriters = (\n", + " pd.json_normalize(favoriters,record_path=['data','favoriters_timeline','timeline', 'instructions'])\n", + " ['entries']\n", + " .explode()\n", + " .apply(pd.Series)\n", + " ['content']\n", + " .apply(pd.Series)\n", + " .pipe(lambda df:df[df['__typename']!='TimelineTimelineCursor'])\n", + " ['itemContent']\n", + " .apply(pd.Series)\n", + " ['user_results']\n", + " .apply(pd.Series)\n", + " ['result']\n", + " .apply(pd.Series)\n", + " .pipe(lambda x: pd.concat([x[['rest_id']],x['legacy'].apply(pd.Series)],axis=1))\n", + " .dropna(subset='created_at')\n", + " .assign(created_at=lambda x:pd.to_datetime(x['created_at']))\n", + " .sort_values('created_at', ascending=False)\n", + " .reset_index(drop=True)\n", + " [[\n", + " 'rest_id',\n", + " 'created_at',\n", + " 'screen_name',\n", + " 'location',\n", + " 'followers_count',\n", + " 'statuses_count',\n", + " 'favourites_count',\n", + " 'media_count',\n", + " 'statuses_count'\n", + " ]]\n", + ")\n", + "df_favoriters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a45a8e55", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/examples/favoriters_and_retweeters.ipynb b/examples/favoriters_and_retweeters.ipynb deleted file mode 100644 index e111d69..0000000 --- a/examples/favoriters_and_retweeters.ipynb +++ /dev/null @@ -1,370 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "802043b7", - "metadata": {}, - "source": [ - "### polars/pandas examples\n", - "\n", - "> Note: structure of GraphQL response is not consistent, these examples may not work in all cases.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "54cd50bf", - "metadata": {}, - "outputs": [], - "source": [ - "# !pip uninstall twitter-api-client -y\n", - "# !pip install twitter-api-client --no-cache-dir" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "a3172006", - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "import re\n", - "from pathlib import Path\n", - "\n", - "import orjson\n", - "import pandas as pd\n", - "import polars as pl\n", - "\n", - "from twitter.util import find_key" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "4703bee3", - "metadata": {}, - "outputs": [], - "source": [ - "def to_int(tdf: pl.LazyFrame, *args) -> pl.LazyFrame:\n", - " return tdf.with_columns(pl.col(col).cast(pl.Int64, strict=False).alias(col) for col in args)\n", - "\n", - "\n", - "def to_dt(tdf: pl.LazyFrame, fmt: str, *args) -> pl.LazyFrame:\n", - " return tdf.with_columns(pl.col(col).str.strptime(pl.Datetime, fmt).alias(col) for col in args)\n", - "\n", - "\n", - "def get_data(path: Path, expr: str = '', **kwargs) -> dict:\n", - " D = {}\n", - " for p in path.rglob('*'):\n", - " if re.search(expr, p.name, **kwargs):\n", - " D.setdefault(p.stem.split('_')[-1], []).append(orjson.loads(p.read_bytes()))\n", - " return D" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "b0addc33", - "metadata": {}, - "outputs": [], - "source": [ - "PATH = Path('data/raw')\n", - "\n", - "# filter for users who favorited or retweeted a tweet\n", - "data = get_data(PATH, expr='Favoriters|Retweeters')" - ] - }, - { - "cell_type": "markdown", - "id": "09efb374", - "metadata": {}, - "source": [ - "### polars" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "e3a70d0e", - "metadata": {}, - "outputs": [], - "source": [ - "def get_user_details(data: dict, cols: list = None, sort: str = 'created_at') -> pl.LazyFrame:\n", - " numeric = [\n", - " 'fast_followers_count',\n", - " 'favourites_count',\n", - " 'followers_count',\n", - " 'friends_count',\n", - " 'listed_count',\n", - " 'media_count',\n", - " 'normal_followers_count',\n", - " 'statuses_count',\n", - " ]\n", - "\n", - " D = []\n", - " for u in find_key(data, 'user_results'):\n", - " x = u.get('result', {})\n", - " y = x.get('rest_id')\n", - " if z := x.get('legacy', {}):\n", - " D.append({'rest_id': y} | z)\n", - "\n", - " return (\n", - " pl.LazyFrame(D)\n", - " .unique(subset='rest_id')\n", - " .pipe(to_dt, '%a %b %d %H:%M:%S %z %Y', 'created_at')\n", - " .pipe(to_int, *numeric)\n", - " .sort(sort.strip(\"-\"), descending=\"-\" not in sort)\n", - " .select(cols)\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "91495fc2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "shape: (1855, 3)
created_atscreen_namefollowers_count
datetime[μs, +00:00]stri64
2007-03-31 01:16:45 +00:00"TheLos"1601
2008-03-18 19:04:59 +00:00"wickedjava"2986
2008-04-17 17:30:21 +00:00"needless_input...218
2008-06-27 08:58:13 +00:00"DebrisStorm"178
2008-07-26 21:58:07 +00:00"daka17"66
2008-09-03 23:27:25 +00:00"heyitsaaron"1230
2008-09-11 23:37:14 +00:00"marinamiss"771
2008-09-18 13:59:25 +00:00"shangrila79"229
2008-10-11 07:18:09 +00:00"fridayschild71...183
2008-10-27 19:40:43 +00:00"Jacelendrahz"188
2008-11-06 21:50:56 +00:00"yolo_pinyato"2944
2008-12-05 07:33:23 +00:00"El_Dandy40"205
2023-02-06 15:48:26 +00:00"CosmicGhidorah...11
2023-02-08 21:09:17 +00:00"backupfHell"14
2023-02-09 19:24:12 +00:00"KayFabulous80"144
2023-02-14 04:06:11 +00:00"HDBNGRClub"3
2023-02-16 18:38:48 +00:00"SladjaMilov14"1
2023-02-17 22:38:58 +00:00"c0pas27"53
2023-02-19 06:35:24 +00:00"B4NKSCLUB"13
2023-02-19 07:06:15 +00:00"Later_Hayter"54
2023-02-21 06:47:49 +00:00"hart_kanya"2
2023-02-26 09:43:04 +00:00"_Val_Nichole"62
2023-03-04 23:50:32 +00:00"Chublosophy"346
2023-03-05 20:56:30 +00:00"Erron_20"8
" - ], - "text/plain": [ - "shape: (1855, 3)\n", - "┌────────────────────────────┬────────────────┬─────────────────┐\n", - "│ created_at ┆ screen_name ┆ followers_count │\n", - "│ --- ┆ --- ┆ --- │\n", - "│ datetime[μs, +00:00] ┆ str ┆ i64 │\n", - "╞════════════════════════════╪════════════════╪═════════════════╡\n", - "│ 2007-03-31 01:16:45 +00:00 ┆ TheLos ┆ 1601 │\n", - "│ 2008-03-18 19:04:59 +00:00 ┆ wickedjava ┆ 2986 │\n", - "│ 2008-04-17 17:30:21 +00:00 ┆ needless_input ┆ 218 │\n", - "│ 2008-06-27 08:58:13 +00:00 ┆ DebrisStorm ┆ 178 │\n", - "│ … ┆ … ┆ … │\n", - "│ 2023-02-21 06:47:49 +00:00 ┆ hart_kanya ┆ 2 │\n", - "│ 2023-02-26 09:43:04 +00:00 ┆ _Val_Nichole ┆ 62 │\n", - "│ 2023-03-04 23:50:32 +00:00 ┆ Chublosophy ┆ 346 │\n", - "│ 2023-03-05 20:56:30 +00:00 ┆ Erron_20 ┆ 8 │\n", - "└────────────────────────────┴────────────────┴─────────────────┘" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lf = get_user_details(\n", - " data,\n", - " cols=['created_at', 'screen_name', 'followers_count'],\n", - " sort='-created_at',\n", - ")\n", - "\n", - "lf.collect()" - ] - }, - { - "cell_type": "markdown", - "id": "03aa8cc0", - "metadata": {}, - "source": [ - "### pandas" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "4815e47f", - "metadata": {}, - "outputs": [], - "source": [ - "def get_user_details2(data: dict, cols: list = None, sort: str = 'created_at') -> pd.DataFrame:\n", - " D = []\n", - " for u in find_key(data, 'user_results'):\n", - " x = u.get('result', {})\n", - " y = x.get('rest_id')\n", - " if z := x.get('legacy', {}):\n", - " D.append({'rest_id': y} | z)\n", - " df = (\n", - " pd.DataFrame(D)\n", - " .drop_duplicates('rest_id')\n", - " .assign(created_at=lambda x: pd.to_datetime(x['created_at']))\n", - " .sort_values(sort.strip('-'), ascending='-' in sort)\n", - " .reset_index(drop=True)\n", - " )\n", - " n = [x for x in df.columns if 'count' in x]\n", - " df[n] = df[n].apply(pd.to_numeric, errors='coerce')\n", - " return df[cols] if cols else df" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "feb0251b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
created_atscreen_namefollowers_count
02007-03-31 01:16:45+00:00TheLos1601
12008-03-18 19:04:59+00:00wickedjava2986
22008-04-17 17:30:21+00:00needless_input218
32008-06-27 08:58:13+00:00DebrisStorm178
42008-07-26 21:58:07+00:00daka1766
............
18502023-02-19 07:06:15+00:00Later_Hayter54
18512023-02-21 06:47:49+00:00hart_kanya2
18522023-02-26 09:43:04+00:00_Val_Nichole62
18532023-03-04 23:50:32+00:00Chublosophy346
18542023-03-05 20:56:30+00:00Erron_208
\n", - "

1855 rows × 3 columns

\n", - "
" - ], - "text/plain": [ - " created_at screen_name followers_count\n", - "0 2007-03-31 01:16:45+00:00 TheLos 1601\n", - "1 2008-03-18 19:04:59+00:00 wickedjava 2986\n", - "2 2008-04-17 17:30:21+00:00 needless_input 218\n", - "3 2008-06-27 08:58:13+00:00 DebrisStorm 178\n", - "4 2008-07-26 21:58:07+00:00 daka17 66\n", - "... ... ... ...\n", - "1850 2023-02-19 07:06:15+00:00 Later_Hayter 54\n", - "1851 2023-02-21 06:47:49+00:00 hart_kanya 2\n", - "1852 2023-02-26 09:43:04+00:00 _Val_Nichole 62\n", - "1853 2023-03-04 23:50:32+00:00 Chublosophy 346\n", - "1854 2023-03-05 20:56:30+00:00 Erron_20 8\n", - "\n", - "[1855 rows x 3 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "PATH = Path('data/raw')\n", - "\n", - "data = get_data(PATH, expr='Favoriters|Retweeters') # filter for users who favorited or retweeted a tweet\n", - "\n", - "df = get_user_details2(\n", - " data,\n", - " cols = ['created_at','screen_name','followers_count'],\n", - " sort = '-created_at',\n", - ")\n", - "\n", - "df" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/search.ipynb b/examples/search.ipynb deleted file mode 100644 index 70808a0..0000000 --- a/examples/search.ipynb +++ /dev/null @@ -1,353 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "> Note: structure of GraphQL response is not consistent, these examples may not work in all cases." - ], - "metadata": { - "collapsed": false - }, - "id": "85ee96fb4899b369" - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "d815a387", - "metadata": {}, - "outputs": [], - "source": [ - "# !pip uninstall twitter-api-client -y\n", - "# !pip install twitter-api-client --no-cache-dir" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1ecf8cb", - "metadata": {}, - "outputs": [], - "source": [ - "from twitter.search import Search\n", - "import pandas as pd\n", - "\n", - "email, username, password = ..., ..., ...\n", - "search = Search(email, username, password)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "98c65601", - "metadata": { - "scrolled": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-05-18 21:20:12,075.075 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", - "2023-05-18 21:20:12,656.656 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", - "2023-05-18 21:20:13,452.452 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", - "2023-05-18 21:20:13,899.899 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", - "2023-05-18 21:20:14,539.539 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n", - "2023-05-18 21:20:14,938.938 DEBUG: [\u001B[32msuccess\u001B[0m] returned 101 search results for \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
created_atiduser_idfull_textlanguser_urltweet_urlgeocoordinatesplace
02023-05-19 04:07:19+00:001659410380026773509809177430602776576@msdarlin_ JENNIFER HUDSON first considered al...enhttps://twitter.com/i/user/809177430602776576https://twitter.com/i/status/1659410380026773509NoneNoneNone
12023-05-19 03:50:16+00:00165940608857842892921226048Jennifer Hudson - Believe https://t.co/vjqlw52MjOenhttps://twitter.com/i/user/21226048https://twitter.com/i/status/1659406088578428929NoneNoneNone
22023-05-19 03:03:12+00:001659394245835255808174826024If Fantasia and Jennifer Hudson do this verzuz...enhttps://twitter.com/i/user/174826024https://twitter.com/i/status/1659394245835255808NoneNoneNone
32023-05-19 02:44:21+00:0016593894992211886091143382733001039873jennifer hudson acabou de postar uma foto e no...pthttps://twitter.com/i/user/1143382733001039873https://twitter.com/i/status/1659389499221188609NoneNoneNone
42023-05-19 02:41:35+00:0016593888051185786891342931884150464512Jennifer Hudsoncyhttps://twitter.com/i/user/1342931884150464512https://twitter.com/i/status/1659388805118578689NoneNoneNone
.................................
962023-05-17 20:57:29+00:001658939820574400516534285941I can’t 🤣🤣 https://t.co/2tiIyHrMb7enhttps://twitter.com/i/user/534285941https://twitter.com/i/status/1658939820574400516NoneNoneNone
972023-05-17 19:46:21+00:001658921918890758148417935020Idk ask her https://t.co/md7BJf59C2enhttps://twitter.com/i/user/417935020https://twitter.com/i/status/1658921918890758148NoneNoneNone
982023-05-17 19:09:50+00:0016589127309910097952384861195My best hip hop female Dj @ChainzMsDj Dancing ...enhttps://twitter.com/i/user/2384861195https://twitter.com/i/status/1658912730991009795NoneNoneNone
992023-05-17 16:56:52+00:00165887926923232051415733529Kelly will sing with D. Smooth\\n\\nThe Complete...enhttps://twitter.com/i/user/15733529https://twitter.com/i/status/1658879269232320514NoneNoneNone
1002023-05-17 14:20:31+00:00165883991927865344417230018my dream collab? gimme _____ and ______.enhttps://twitter.com/i/user/17230018https://twitter.com/i/status/1658839919278653444NoneNoneNone
\n", - "

101 rows × 10 columns

\n", - "
" - ], - "text/plain": [ - " created_at id user_id \n", - "0 2023-05-19 04:07:19+00:00 1659410380026773509 809177430602776576 \\\n", - "1 2023-05-19 03:50:16+00:00 1659406088578428929 21226048 \n", - "2 2023-05-19 03:03:12+00:00 1659394245835255808 174826024 \n", - "3 2023-05-19 02:44:21+00:00 1659389499221188609 1143382733001039873 \n", - "4 2023-05-19 02:41:35+00:00 1659388805118578689 1342931884150464512 \n", - ".. ... ... ... \n", - "96 2023-05-17 20:57:29+00:00 1658939820574400516 534285941 \n", - "97 2023-05-17 19:46:21+00:00 1658921918890758148 417935020 \n", - "98 2023-05-17 19:09:50+00:00 1658912730991009795 2384861195 \n", - "99 2023-05-17 16:56:52+00:00 1658879269232320514 15733529 \n", - "100 2023-05-17 14:20:31+00:00 1658839919278653444 17230018 \n", - "\n", - " full_text lang \n", - "0 @msdarlin_ JENNIFER HUDSON first considered al... en \\\n", - "1 Jennifer Hudson - Believe https://t.co/vjqlw52MjO en \n", - "2 If Fantasia and Jennifer Hudson do this verzuz... en \n", - "3 jennifer hudson acabou de postar uma foto e no... pt \n", - "4 Jennifer Hudson cy \n", - ".. ... ... \n", - "96 I can’t 🤣🤣 https://t.co/2tiIyHrMb7 en \n", - "97 Idk ask her https://t.co/md7BJf59C2 en \n", - "98 My best hip hop female Dj @ChainzMsDj Dancing ... en \n", - "99 Kelly will sing with D. Smooth\\n\\nThe Complete... en \n", - "100 my dream collab? gimme _____ and ______. en \n", - "\n", - " user_url \n", - "0 https://twitter.com/i/user/809177430602776576 \\\n", - "1 https://twitter.com/i/user/21226048 \n", - "2 https://twitter.com/i/user/174826024 \n", - "3 https://twitter.com/i/user/1143382733001039873 \n", - "4 https://twitter.com/i/user/1342931884150464512 \n", - ".. ... \n", - "96 https://twitter.com/i/user/534285941 \n", - "97 https://twitter.com/i/user/417935020 \n", - "98 https://twitter.com/i/user/2384861195 \n", - "99 https://twitter.com/i/user/15733529 \n", - "100 https://twitter.com/i/user/17230018 \n", - "\n", - " tweet_url geo coordinates place \n", - "0 https://twitter.com/i/status/1659410380026773509 None None None \n", - "1 https://twitter.com/i/status/1659406088578428929 None None None \n", - "2 https://twitter.com/i/status/1659394245835255808 None None None \n", - "3 https://twitter.com/i/status/1659389499221188609 None None None \n", - "4 https://twitter.com/i/status/1659388805118578689 None None None \n", - ".. ... ... ... ... \n", - "96 https://twitter.com/i/status/1658939820574400516 None None None \n", - "97 https://twitter.com/i/status/1658921918890758148 None None None \n", - "98 https://twitter.com/i/status/1658912730991009795 None None None \n", - "99 https://twitter.com/i/status/1658879269232320514 None None None \n", - "100 https://twitter.com/i/status/1658839919278653444 None None None \n", - "\n", - "[101 rows x 10 columns]" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "latest_results = search.run(\n", - " 'jennifer hudson since:2023-05-18',\n", - " limit=100,\n", - " latest=True, # get latest tweets only\n", - " retries=3,\n", - ")\n", - "\n", - "flat_results = [y for x in latest_results for y in x]\n", - "data = [r.get('globalObjects', {}).get('tweets', {})for r in flat_results]\n", - "base= 'https://twitter.com/i'\n", - "\n", - "df = (\n", - " pd.DataFrame({k:v for d in data for k,v in d.items()})\n", - " .T\n", - " .assign(created_at = lambda x: pd.to_datetime(x['created_at'], format='%a %b %d %H:%M:%S %z %Y'))\n", - " .assign(user_url = lambda x: f\"{base}/user/\"+x['user_id_str'])\n", - " .assign(tweet_url = lambda x: f\"{base}/status/\"+x['id_str'] )\n", - " .sort_values('created_at',ascending=False)\n", - " .drop_duplicates('id')\n", - " .reset_index(drop=True)\n", - ")\n", - "\n", - "# sample df with a few cols of interest\n", - "sample = df[['created_at','id','user_id','full_text', 'lang',\n", - " 'user_url', 'tweet_url', 'geo', 'coordinates', 'place']]\n", - "\n", - "sample" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/tweet_details.ipynb b/examples/tweet_details.ipynb deleted file mode 100644 index a89af60..0000000 --- a/examples/tweet_details.ipynb +++ /dev/null @@ -1,258 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "> Note: structure of GraphQL response is not consistent, these examples may not work in all cases." - ], - "metadata": { - "collapsed": false - }, - "id": "56e4acd2bbce8025" - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "2f27b8db", - "metadata": {}, - "outputs": [], - "source": [ - "# !pip install twitter-api-client==0.9.0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7885eecd", - "metadata": { - "scrolled": false - }, - "outputs": [], - "source": [ - "from twitter.scraper import Scraper\n", - "from twitter.util import *\n", - "import pandas as pd\n", - "\n", - "scraper = Scraper(session=init_session())\n", - "\n", - "# example tweet\n", - "tweet = scraper.tweets_details([1476988122986647553], limit=500, pbar=False)\n", - "\n", - "# unnest items and filter deleted tweets\n", - "items = [y for x in find_key(tweet, 'items') for y in x if not find_key(y, 'tombstone')]\n", - "\n", - "# index into relevant data points\n", - "tweet_results = [x.get('result') for x in find_key(items, 'tweet_results')]\n", - "\n", - "df = (\n", - " pd.json_normalize(tweet_results)\n", - " # remove duplicate replies if needed\n", - " .drop_duplicates('rest_id')\n", - " # clean up column names for illustrative purposes\n", - " .assign(date=lambda x: pd.to_datetime(x['legacy.created_at']).dt.strftime(\"%Y-%m-%d %H:%M:%S\"))\n", - " .assign(root_tweet=lambda x: x['legacy.conversation_id_str'])\n", - " .assign(text=lambda x: x['legacy.full_text'])\n", - " .assign(tweet=lambda x: x['rest_id'])\n", - " .assign(username=lambda x: x['core.user_results.result.legacy.screen_name'])\n", - " # sort by newest replies to root_tweet\n", - " .sort_values('date', ascending=False)\n", - " .reset_index(drop=True)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "f5ddad65", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
dateusernametweettextroot_tweet
02023-05-25 19:52:44dtweedys1661822628393254913@MKBHD 🤺🔸💜 #SubTweets ftw 🔸🌑🕊️1476988122986647553
12023-05-07 14:30:30BallSchlonger1655218553858015233@jacknft_4 @MKBHD sorry i cant1476988122986647553
22023-05-03 21:13:33jacknft_41653870434075410436@MKBHD Great! Dm me please1476988122986647553
32023-05-02 17:38:40tanmayshah281653453969476456450@MKBHD Thank you! Lets too!1476988122986647553
42023-04-02 14:08:55iStevenPlays1642529545415712768@MKBHD I got you.\\n\\n‘🔥__🔥’1476988122986647553
..................
1992021-12-31 18:46:59sbeams1476988269187448832@MKBHD https://t.co/1Fu5cKmPTn1476988122986647553
2002021-12-31 18:46:48woworeoo1476988222609793033@MKBHD Yeah Yeah no more tweets from you1476988122986647553
2012021-12-31 18:46:46disisjorj1476988217576640517@MKBHD @vassizzle1476988122986647553
2022021-12-31 18:46:46TechWizYT1476988215043276802@MKBHD Great advice!1476988122986647553
2032021-12-31 18:46:410x_flea1476988197146185730@MKBHD ❤️🤘🏼1476988122986647553
\n", - "

204 rows × 5 columns

\n", - "
" - ], - "text/plain": [ - " date username tweet \n", - "0 2023-05-25 19:52:44 dtweedys 1661822628393254913 \\\n", - "1 2023-05-07 14:30:30 BallSchlonger 1655218553858015233 \n", - "2 2023-05-03 21:13:33 jacknft_4 1653870434075410436 \n", - "3 2023-05-02 17:38:40 tanmayshah28 1653453969476456450 \n", - "4 2023-04-02 14:08:55 iStevenPlays 1642529545415712768 \n", - ".. ... ... ... \n", - "199 2021-12-31 18:46:59 sbeams 1476988269187448832 \n", - "200 2021-12-31 18:46:48 woworeoo 1476988222609793033 \n", - "201 2021-12-31 18:46:46 disisjorj 1476988217576640517 \n", - "202 2021-12-31 18:46:46 TechWizYT 1476988215043276802 \n", - "203 2021-12-31 18:46:41 0x_flea 1476988197146185730 \n", - "\n", - " text root_tweet \n", - "0 @MKBHD 🤺🔸💜 #SubTweets ftw 🔸🌑🕊️ 1476988122986647553 \n", - "1 @jacknft_4 @MKBHD sorry i cant 1476988122986647553 \n", - "2 @MKBHD Great! Dm me please 1476988122986647553 \n", - "3 @MKBHD Thank you! Lets too! 1476988122986647553 \n", - "4 @MKBHD I got you.\\n\\n‘🔥__🔥’ 1476988122986647553 \n", - ".. ... ... \n", - "199 @MKBHD https://t.co/1Fu5cKmPTn 1476988122986647553 \n", - "200 @MKBHD Yeah Yeah no more tweets from you 1476988122986647553 \n", - "201 @MKBHD @vassizzle 1476988122986647553 \n", - "202 @MKBHD Great advice! 1476988122986647553 \n", - "203 @MKBHD ❤️🤘🏼 1476988122986647553 \n", - "\n", - "[204 rows x 5 columns]" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[[\n", - " 'date',\n", - " 'username',\n", - " 'tweet',\n", - " 'text',\n", - " 'root_tweet',\n", - "]]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/tweets.ipynb b/examples/tweets.ipynb deleted file mode 100644 index 592eda0..0000000 --- a/examples/tweets.ipynb +++ /dev/null @@ -1,779 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "source": [ - "> Note: structure of GraphQL response is not consistent, these examples may not work in all cases." - ], - "metadata": { - "collapsed": false - }, - "id": "4739fa454bb20238" - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "f65b5a54", - "metadata": {}, - "outputs": [], - "source": [ - "import re\n", - "import time\n", - "import pandas as pd\n", - "\n", - "from twitter.scraper import Scraper\n", - "from twitter.util import find_key" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "1d7714a8", - "metadata": {}, - "outputs": [], - "source": [ - "email, username, password = ..., ..., ...\n", - "scraper = Scraper(email, username, password)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "tweets = scraper.tweets([33836629]).pop()\n", - "tweets_and_replies = scraper.tweets_and_replies([33836629]).pop()" - ], - "metadata": { - "collapsed": false - }, - "id": "d9c839bfb7d99004" - }, - { - "cell_type": "markdown", - "id": "a1339a2b", - "metadata": {}, - "source": [ - "### Find all unique urls in users tweets" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "f64a96d9", - "metadata": {}, - "outputs": [], - "source": [ - "unique_urls = set(find_key(tweets, 'expanded_url'))\n", - "unique_urls" - ] - }, - { - "cell_type": "markdown", - "id": "c8184cd5", - "metadata": {}, - "source": [ - "### Get summary of user tweet data" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "9e87995c", - "metadata": {}, - "outputs": [], - "source": [ - "tweet_data = []\n", - "for d in tweets + tweets_and_replies:\n", - " instructions = find_key(d, 'instructions').pop()\n", - " entries = find_key(instructions, 'entries').pop()\n", - " for entry in entries:\n", - " legacy = find_key(entry, 'legacy')\n", - " tweet_data.extend(legacy)\n", - "\n", - "user_key = 'can_dm' # filter using arbitrary key that only users have\n", - "expr = (x for x in tweet_data for k in x if k != user_key)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "224d5078", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_id_strid_strcreated_atfavorite_countfull_textquote_countreply_countretweet_count
03383662916372130693016494082023-03-18 22:03:08+00:0069.0@theamazingdrj Yes the integration right into ...1.06.04.0
1118149380535615897816372124486746849282023-03-18 22:00:40+00:009.0@karpathy How does it compare to using chatGPT...0.02.01.0
23383662916371885999670272002023-03-18 20:25:54+00:0013.0@ErikSchluntz Very likely0.01.01.0
3137484108129302118816371836524582830082023-03-18 20:06:14+00:006.0@karpathy Do you think this will work well for...0.01.00.0
43383662916371541113334947842023-03-18 18:08:51+00:005.0@aliapanahi logprobs kwarg https://t.co/4Uuh4V...0.01.01.0
...........................
2193383662916000315724422184972022-12-06 07:37:08+00:00248.0😂 stop Riley probably up there as someone who ...2.08.012.0
2201653543216000125709490585602022-12-06 06:21:38+00:001698.0To get a sense of how hyped LLMs are right now...18.047.096.0
2213383662915934179876874731522022-11-18 01:37:07+00:00206.0If previous neural nets are special-purpose co...5.02.016.0
2223383662915287927158103941122022-05-23 17:39:21+00:003044.0Something I've been doing for a few years that...42.0184.0115.0
2233383662915284536045157785602022-05-22 19:11:51+00:00914.0real-world data distribution is ~N(0,1)\\ngood ...11.047.065.0
\n", - "

224 rows × 8 columns

\n", - "
" - ], - "text/plain": [ - " user_id_str id_str created_at \\\n", - "0 33836629 1637213069301649408 2023-03-18 22:03:08+00:00 \n", - "1 1181493805356158978 1637212448674684928 2023-03-18 22:00:40+00:00 \n", - "2 33836629 1637188599967027200 2023-03-18 20:25:54+00:00 \n", - "3 1374841081293021188 1637183652458283008 2023-03-18 20:06:14+00:00 \n", - "4 33836629 1637154111333494784 2023-03-18 18:08:51+00:00 \n", - ".. ... ... ... \n", - "219 33836629 1600031572442218497 2022-12-06 07:37:08+00:00 \n", - "220 16535432 1600012570949058560 2022-12-06 06:21:38+00:00 \n", - "221 33836629 1593417987687473152 2022-11-18 01:37:07+00:00 \n", - "222 33836629 1528792715810394112 2022-05-23 17:39:21+00:00 \n", - "223 33836629 1528453604515778560 2022-05-22 19:11:51+00:00 \n", - "\n", - " favorite_count full_text \\\n", - "0 69.0 @theamazingdrj Yes the integration right into ... \n", - "1 9.0 @karpathy How does it compare to using chatGPT... \n", - "2 13.0 @ErikSchluntz Very likely \n", - "3 6.0 @karpathy Do you think this will work well for... \n", - "4 5.0 @aliapanahi logprobs kwarg https://t.co/4Uuh4V... \n", - ".. ... ... \n", - "219 248.0 😂 stop Riley probably up there as someone who ... \n", - "220 1698.0 To get a sense of how hyped LLMs are right now... \n", - "221 206.0 If previous neural nets are special-purpose co... \n", - "222 3044.0 Something I've been doing for a few years that... \n", - "223 914.0 real-world data distribution is ~N(0,1)\\ngood ... \n", - "\n", - " quote_count reply_count retweet_count \n", - "0 1.0 6.0 4.0 \n", - "1 0.0 2.0 1.0 \n", - "2 0.0 1.0 1.0 \n", - "3 0.0 1.0 0.0 \n", - "4 0.0 1.0 1.0 \n", - ".. ... ... ... \n", - "219 2.0 8.0 12.0 \n", - "220 18.0 47.0 96.0 \n", - "221 5.0 2.0 16.0 \n", - "222 42.0 184.0 115.0 \n", - "223 11.0 47.0 65.0 \n", - "\n", - "[224 rows x 8 columns]" - ] - }, - "execution_count": 60, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "## simple subset of relevant tweet fields \n", - "cols = [\n", - " 'user_id_str',\n", - " 'id_str',\n", - " 'created_at',\n", - " 'favorite_count',\n", - " 'full_text',\n", - " 'quote_count',\n", - " 'reply_count',\n", - " 'retweet_count',\n", - " # 'retweeted',\n", - " # 'conversation_id_str',\n", - " # 'favorited',\n", - " # 'is_quote_status',\n", - " # 'lang',\n", - " # 'quoted_status_id_str',\n", - "]\n", - "\n", - "df = pd.DataFrame(expr)[cols]\n", - "\n", - "df['created_at'] = pd.to_datetime(df['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n", - "\n", - "numeric = [\n", - " 'favorite_count',\n", - " 'quote_count',\n", - " 'reply_count',\n", - " 'retweet_count',\n", - "]\n", - "\n", - "df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')\n", - "\n", - "## drop duplicates, sort by date\n", - "df = (df\n", - " .dropna(subset='id_str')\n", - " .drop_duplicates(subset='id_str')\n", - " .sort_values('created_at', ascending=False)\n", - " .reset_index(drop=True)\n", - " )\n", - "\n", - "# df.to_feather(f'{time.time_ns()}.feather')\n", - "# df.to_parquet(f'{time.time_ns()}.parquet')\n", - "df.to_csv(f'{time.time_ns()}.csv', index=False)\n", - "\n", - "df" - ] - }, - { - "cell_type": "markdown", - "id": "9103413b", - "metadata": {}, - "source": [ - "### search tweet text" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "id": "401712a3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_id_strid_strcreated_atfavorite_countfull_textquote_countreply_countretweet_count
95266770016371527157165834242023-03-18 18:03:18+00:0099.0@karpathy Sometimes I wish people could unders...2.01.05.0
143383662916371478224821657602023-03-18 17:43:52+00:00325.0If not careful, fine-tuning collapses entropy ...5.09.021.0
1778853393588607795216367866089168199682023-03-17 17:48:32+00:00411.0I finally installed github copilot (better lat...5.015.014.0
183383662916367657356273950732023-03-17 16:25:35+00:0022.0@BlancheMinerva @JosephJacks_ I didn’t work on...0.04.01.0
203383662916364592451841064972023-03-16 20:07:42+00:001254.0Less publicized but highly awesome aspect of G...10.038.0132.0
1443383662916208752637007994882023-02-01 20:02:31+00:0010.0@portisto @trending_repos sad. The way they co...0.01.02.0
1456562955216208504302542233602023-02-01 18:23:51+00:007.0@trending_repos @karpathy How can a main langu...0.04.00.0
1463383662916208117249528668162023-02-01 15:50:03+00:00245.0@trending_repos wow0.06.04.0
147116235912729486131416207491305566699522023-02-01 11:41:19+00:002541.0Trending repository of the month 🏆\\n \\nnanoGP...9.019.0320.0
1503383662916201875959795138572023-01-30 22:29:59+00:0015.0@hi_tysam It was very nice to read through top...0.01.02.0
151161544188367250229116201854087212564492023-01-30 22:21:17+00:0015.0@karpathy I'm honored and a bit stunned. Wow, ...0.03.00.0
1783383662916132504878387077122023-01-11 19:04:23+00:002257.0Didn't tweet nanoGPT yet (quietly getting it t...24.039.0303.0
1863383662916088951890783805442022-12-30 18:37:59+00:004356.0Nice read on reverse engineering of GitHub Cop...145.085.0555.0
1903383662916077915392580034572022-12-27 17:32:28+00:00556.0Context I realized I have to split up minGPT b...2.023.016.0
\n", - "
" - ], - "text/plain": [ - " user_id_str id_str created_at \\\n", - "9 52667700 1637152715716583424 2023-03-18 18:03:18+00:00 \n", - "14 33836629 1637147822482165760 2023-03-18 17:43:52+00:00 \n", - "17 788533935886077952 1636786608916819968 2023-03-17 17:48:32+00:00 \n", - "18 33836629 1636765735627395073 2023-03-17 16:25:35+00:00 \n", - "20 33836629 1636459245184106497 2023-03-16 20:07:42+00:00 \n", - "144 33836629 1620875263700799488 2023-02-01 20:02:31+00:00 \n", - "145 65629552 1620850430254223360 2023-02-01 18:23:51+00:00 \n", - "146 33836629 1620811724952866816 2023-02-01 15:50:03+00:00 \n", - "147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n", - "150 33836629 1620187595979513857 2023-01-30 22:29:59+00:00 \n", - "151 1615441883672502291 1620185408721256449 2023-01-30 22:21:17+00:00 \n", - "178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n", - "186 33836629 1608895189078380544 2022-12-30 18:37:59+00:00 \n", - "190 33836629 1607791539258003457 2022-12-27 17:32:28+00:00 \n", - "\n", - " favorite_count full_text \\\n", - "9 99.0 @karpathy Sometimes I wish people could unders... \n", - "14 325.0 If not careful, fine-tuning collapses entropy ... \n", - "17 411.0 I finally installed github copilot (better lat... \n", - "18 22.0 @BlancheMinerva @JosephJacks_ I didn’t work on... \n", - "20 1254.0 Less publicized but highly awesome aspect of G... \n", - "144 10.0 @portisto @trending_repos sad. The way they co... \n", - "145 7.0 @trending_repos @karpathy How can a main langu... \n", - "146 245.0 @trending_repos wow \n", - "147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n", - "150 15.0 @hi_tysam It was very nice to read through top... \n", - "151 15.0 @karpathy I'm honored and a bit stunned. Wow, ... \n", - "178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n", - "186 4356.0 Nice read on reverse engineering of GitHub Cop... \n", - "190 556.0 Context I realized I have to split up minGPT b... \n", - "\n", - " quote_count reply_count retweet_count \n", - "9 2.0 1.0 5.0 \n", - "14 5.0 9.0 21.0 \n", - "17 5.0 15.0 14.0 \n", - "18 0.0 4.0 1.0 \n", - "20 10.0 38.0 132.0 \n", - "144 0.0 1.0 2.0 \n", - "145 0.0 4.0 0.0 \n", - "146 0.0 6.0 4.0 \n", - "147 9.0 19.0 320.0 \n", - "150 0.0 1.0 2.0 \n", - "151 0.0 3.0 0.0 \n", - "178 24.0 39.0 303.0 \n", - "186 145.0 85.0 555.0 \n", - "190 2.0 23.0 16.0 " - ] - }, - "execution_count": 56, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.full_text.str.contains('repos?i?|github', regex=True, flags=re.I)]" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "96ebc3fd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
user_id_strid_strcreated_atfavorite_countfull_textquote_countreply_countretweet_count
58161544188367250229116325775885299548192023-03-06 03:03:23+00:0091.0Speed up your LLM research exploration with a ...2.03.014.0
1433383662916215783540246773772023-02-03 18:36:21+00:005276.0The most dramatic optimization to nanoGPT so f...57.089.0353.0
147116235912729486131416207491305566699522023-02-01 11:41:19+00:002541.0Trending repository of the month 🏆\\n \\nnanoGP...9.019.0320.0
1723383662916153981176833884172023-01-17 17:18:18+00:0021166.0🔥 New (1h56m) video lecture: \"Let's build GPT:...331.0546.03321.0
1783383662916132504878387077122023-01-11 19:04:23+00:002257.0Didn't tweet nanoGPT yet (quietly getting it t...24.039.0303.0
\n", - "
" - ], - "text/plain": [ - " user_id_str id_str created_at \\\n", - "58 1615441883672502291 1632577588529954819 2023-03-06 03:03:23+00:00 \n", - "143 33836629 1621578354024677377 2023-02-03 18:36:21+00:00 \n", - "147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n", - "172 33836629 1615398117683388417 2023-01-17 17:18:18+00:00 \n", - "178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n", - "\n", - " favorite_count full_text \\\n", - "58 91.0 Speed up your LLM research exploration with a ... \n", - "143 5276.0 The most dramatic optimization to nanoGPT so f... \n", - "147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n", - "172 21166.0 🔥 New (1h56m) video lecture: \"Let's build GPT:... \n", - "178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n", - "\n", - " quote_count reply_count retweet_count \n", - "58 2.0 3.0 14.0 \n", - "143 57.0 89.0 353.0 \n", - "147 9.0 19.0 320.0 \n", - "172 331.0 546.0 3321.0 \n", - "178 24.0 39.0 303.0 " - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "flags = re.I\n", - "\n", - "(df\n", - " .query('full_text.str.contains(\"nanogpt\", regex=True, flags=@flags)', engine='python')\n", - " # .query(...)\n", - " # .query(...)\n", - " )" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}