Files
twitter-api-client/examples/tweets.ipynb
2024-01-12 17:00:01 -08:00

780 lines
28 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"source": [
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
],
"metadata": {
"collapsed": false
},
"id": "4739fa454bb20238"
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f65b5a54",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import time\n",
"import pandas as pd\n",
"\n",
"from twitter.scraper import Scraper\n",
"from twitter.util import find_key"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "1d7714a8",
"metadata": {},
"outputs": [],
"source": [
"email, username, password = ..., ..., ...\n",
"scraper = Scraper(email, username, password)"
]
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"tweets = scraper.tweets([33836629]).pop()\n",
"tweets_and_replies = scraper.tweets_and_replies([33836629]).pop()"
],
"metadata": {
"collapsed": false
},
"id": "d9c839bfb7d99004"
},
{
"cell_type": "markdown",
"id": "a1339a2b",
"metadata": {},
"source": [
"### Find all unique urls in users tweets"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "f64a96d9",
"metadata": {},
"outputs": [],
"source": [
"unique_urls = set(find_key(tweets, 'expanded_url'))\n",
"unique_urls"
]
},
{
"cell_type": "markdown",
"id": "c8184cd5",
"metadata": {},
"source": [
"### Get summary of user tweet data"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "9e87995c",
"metadata": {},
"outputs": [],
"source": [
"tweet_data = []\n",
"for d in tweets + tweets_and_replies:\n",
" instructions = find_key(d, 'instructions').pop()\n",
" entries = find_key(instructions, 'entries').pop()\n",
" for entry in entries:\n",
" legacy = find_key(entry, 'legacy')\n",
" tweet_data.extend(legacy)\n",
"\n",
"user_key = 'can_dm' # filter using arbitrary key that only users have\n",
"expr = (x for x in tweet_data for k in x if k != user_key)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "224d5078",
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id_str</th>\n",
" <th>id_str</th>\n",
" <th>created_at</th>\n",
" <th>favorite_count</th>\n",
" <th>full_text</th>\n",
" <th>quote_count</th>\n",
" <th>reply_count</th>\n",
" <th>retweet_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>33836629</td>\n",
" <td>1637213069301649408</td>\n",
" <td>2023-03-18 22:03:08+00:00</td>\n",
" <td>69.0</td>\n",
" <td>@theamazingdrj Yes the integration right into ...</td>\n",
" <td>1.0</td>\n",
" <td>6.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1181493805356158978</td>\n",
" <td>1637212448674684928</td>\n",
" <td>2023-03-18 22:00:40+00:00</td>\n",
" <td>9.0</td>\n",
" <td>@karpathy How does it compare to using chatGPT...</td>\n",
" <td>0.0</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>33836629</td>\n",
" <td>1637188599967027200</td>\n",
" <td>2023-03-18 20:25:54+00:00</td>\n",
" <td>13.0</td>\n",
" <td>@ErikSchluntz Very likely</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1374841081293021188</td>\n",
" <td>1637183652458283008</td>\n",
" <td>2023-03-18 20:06:14+00:00</td>\n",
" <td>6.0</td>\n",
" <td>@karpathy Do you think this will work well for...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>33836629</td>\n",
" <td>1637154111333494784</td>\n",
" <td>2023-03-18 18:08:51+00:00</td>\n",
" <td>5.0</td>\n",
" <td>@aliapanahi logprobs kwarg https://t.co/4Uuh4V...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>219</th>\n",
" <td>33836629</td>\n",
" <td>1600031572442218497</td>\n",
" <td>2022-12-06 07:37:08+00:00</td>\n",
" <td>248.0</td>\n",
" <td>😂 stop Riley probably up there as someone who ...</td>\n",
" <td>2.0</td>\n",
" <td>8.0</td>\n",
" <td>12.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>220</th>\n",
" <td>16535432</td>\n",
" <td>1600012570949058560</td>\n",
" <td>2022-12-06 06:21:38+00:00</td>\n",
" <td>1698.0</td>\n",
" <td>To get a sense of how hyped LLMs are right now...</td>\n",
" <td>18.0</td>\n",
" <td>47.0</td>\n",
" <td>96.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>221</th>\n",
" <td>33836629</td>\n",
" <td>1593417987687473152</td>\n",
" <td>2022-11-18 01:37:07+00:00</td>\n",
" <td>206.0</td>\n",
" <td>If previous neural nets are special-purpose co...</td>\n",
" <td>5.0</td>\n",
" <td>2.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>222</th>\n",
" <td>33836629</td>\n",
" <td>1528792715810394112</td>\n",
" <td>2022-05-23 17:39:21+00:00</td>\n",
" <td>3044.0</td>\n",
" <td>Something I've been doing for a few years that...</td>\n",
" <td>42.0</td>\n",
" <td>184.0</td>\n",
" <td>115.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>33836629</td>\n",
" <td>1528453604515778560</td>\n",
" <td>2022-05-22 19:11:51+00:00</td>\n",
" <td>914.0</td>\n",
" <td>real-world data distribution is ~N(0,1)\\ngood ...</td>\n",
" <td>11.0</td>\n",
" <td>47.0</td>\n",
" <td>65.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>224 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" user_id_str id_str created_at \\\n",
"0 33836629 1637213069301649408 2023-03-18 22:03:08+00:00 \n",
"1 1181493805356158978 1637212448674684928 2023-03-18 22:00:40+00:00 \n",
"2 33836629 1637188599967027200 2023-03-18 20:25:54+00:00 \n",
"3 1374841081293021188 1637183652458283008 2023-03-18 20:06:14+00:00 \n",
"4 33836629 1637154111333494784 2023-03-18 18:08:51+00:00 \n",
".. ... ... ... \n",
"219 33836629 1600031572442218497 2022-12-06 07:37:08+00:00 \n",
"220 16535432 1600012570949058560 2022-12-06 06:21:38+00:00 \n",
"221 33836629 1593417987687473152 2022-11-18 01:37:07+00:00 \n",
"222 33836629 1528792715810394112 2022-05-23 17:39:21+00:00 \n",
"223 33836629 1528453604515778560 2022-05-22 19:11:51+00:00 \n",
"\n",
" favorite_count full_text \\\n",
"0 69.0 @theamazingdrj Yes the integration right into ... \n",
"1 9.0 @karpathy How does it compare to using chatGPT... \n",
"2 13.0 @ErikSchluntz Very likely \n",
"3 6.0 @karpathy Do you think this will work well for... \n",
"4 5.0 @aliapanahi logprobs kwarg https://t.co/4Uuh4V... \n",
".. ... ... \n",
"219 248.0 😂 stop Riley probably up there as someone who ... \n",
"220 1698.0 To get a sense of how hyped LLMs are right now... \n",
"221 206.0 If previous neural nets are special-purpose co... \n",
"222 3044.0 Something I've been doing for a few years that... \n",
"223 914.0 real-world data distribution is ~N(0,1)\\ngood ... \n",
"\n",
" quote_count reply_count retweet_count \n",
"0 1.0 6.0 4.0 \n",
"1 0.0 2.0 1.0 \n",
"2 0.0 1.0 1.0 \n",
"3 0.0 1.0 0.0 \n",
"4 0.0 1.0 1.0 \n",
".. ... ... ... \n",
"219 2.0 8.0 12.0 \n",
"220 18.0 47.0 96.0 \n",
"221 5.0 2.0 16.0 \n",
"222 42.0 184.0 115.0 \n",
"223 11.0 47.0 65.0 \n",
"\n",
"[224 rows x 8 columns]"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## simple subset of relevant tweet fields \n",
"cols = [\n",
" 'user_id_str',\n",
" 'id_str',\n",
" 'created_at',\n",
" 'favorite_count',\n",
" 'full_text',\n",
" 'quote_count',\n",
" 'reply_count',\n",
" 'retweet_count',\n",
" # 'retweeted',\n",
" # 'conversation_id_str',\n",
" # 'favorited',\n",
" # 'is_quote_status',\n",
" # 'lang',\n",
" # 'quoted_status_id_str',\n",
"]\n",
"\n",
"df = pd.DataFrame(expr)[cols]\n",
"\n",
"df['created_at'] = pd.to_datetime(df['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
"\n",
"numeric = [\n",
" 'favorite_count',\n",
" 'quote_count',\n",
" 'reply_count',\n",
" 'retweet_count',\n",
"]\n",
"\n",
"df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')\n",
"\n",
"## drop duplicates, sort by date\n",
"df = (df\n",
" .dropna(subset='id_str')\n",
" .drop_duplicates(subset='id_str')\n",
" .sort_values('created_at', ascending=False)\n",
" .reset_index(drop=True)\n",
" )\n",
"\n",
"# df.to_feather(f'{time.time_ns()}.feather')\n",
"# df.to_parquet(f'{time.time_ns()}.parquet')\n",
"df.to_csv(f'{time.time_ns()}.csv', index=False)\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "9103413b",
"metadata": {},
"source": [
"### search tweet text"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "401712a3",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id_str</th>\n",
" <th>id_str</th>\n",
" <th>created_at</th>\n",
" <th>favorite_count</th>\n",
" <th>full_text</th>\n",
" <th>quote_count</th>\n",
" <th>reply_count</th>\n",
" <th>retweet_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>52667700</td>\n",
" <td>1637152715716583424</td>\n",
" <td>2023-03-18 18:03:18+00:00</td>\n",
" <td>99.0</td>\n",
" <td>@karpathy Sometimes I wish people could unders...</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>33836629</td>\n",
" <td>1637147822482165760</td>\n",
" <td>2023-03-18 17:43:52+00:00</td>\n",
" <td>325.0</td>\n",
" <td>If not careful, fine-tuning collapses entropy ...</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" <td>21.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>788533935886077952</td>\n",
" <td>1636786608916819968</td>\n",
" <td>2023-03-17 17:48:32+00:00</td>\n",
" <td>411.0</td>\n",
" <td>I finally installed github copilot (better lat...</td>\n",
" <td>5.0</td>\n",
" <td>15.0</td>\n",
" <td>14.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>33836629</td>\n",
" <td>1636765735627395073</td>\n",
" <td>2023-03-17 16:25:35+00:00</td>\n",
" <td>22.0</td>\n",
" <td>@BlancheMinerva @JosephJacks_ I didnt work on...</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>33836629</td>\n",
" <td>1636459245184106497</td>\n",
" <td>2023-03-16 20:07:42+00:00</td>\n",
" <td>1254.0</td>\n",
" <td>Less publicized but highly awesome aspect of G...</td>\n",
" <td>10.0</td>\n",
" <td>38.0</td>\n",
" <td>132.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>33836629</td>\n",
" <td>1620875263700799488</td>\n",
" <td>2023-02-01 20:02:31+00:00</td>\n",
" <td>10.0</td>\n",
" <td>@portisto @trending_repos sad. The way they co...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>65629552</td>\n",
" <td>1620850430254223360</td>\n",
" <td>2023-02-01 18:23:51+00:00</td>\n",
" <td>7.0</td>\n",
" <td>@trending_repos @karpathy How can a main langu...</td>\n",
" <td>0.0</td>\n",
" <td>4.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>33836629</td>\n",
" <td>1620811724952866816</td>\n",
" <td>2023-02-01 15:50:03+00:00</td>\n",
" <td>245.0</td>\n",
" <td>@trending_repos wow</td>\n",
" <td>0.0</td>\n",
" <td>6.0</td>\n",
" <td>4.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>1162359127294861314</td>\n",
" <td>1620749130556669952</td>\n",
" <td>2023-02-01 11:41:19+00:00</td>\n",
" <td>2541.0</td>\n",
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
" <td>9.0</td>\n",
" <td>19.0</td>\n",
" <td>320.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>33836629</td>\n",
" <td>1620187595979513857</td>\n",
" <td>2023-01-30 22:29:59+00:00</td>\n",
" <td>15.0</td>\n",
" <td>@hi_tysam It was very nice to read through top...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>151</th>\n",
" <td>1615441883672502291</td>\n",
" <td>1620185408721256449</td>\n",
" <td>2023-01-30 22:21:17+00:00</td>\n",
" <td>15.0</td>\n",
" <td>@karpathy I'm honored and a bit stunned. Wow, ...</td>\n",
" <td>0.0</td>\n",
" <td>3.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>33836629</td>\n",
" <td>1613250487838707712</td>\n",
" <td>2023-01-11 19:04:23+00:00</td>\n",
" <td>2257.0</td>\n",
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
" <td>24.0</td>\n",
" <td>39.0</td>\n",
" <td>303.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>186</th>\n",
" <td>33836629</td>\n",
" <td>1608895189078380544</td>\n",
" <td>2022-12-30 18:37:59+00:00</td>\n",
" <td>4356.0</td>\n",
" <td>Nice read on reverse engineering of GitHub Cop...</td>\n",
" <td>145.0</td>\n",
" <td>85.0</td>\n",
" <td>555.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190</th>\n",
" <td>33836629</td>\n",
" <td>1607791539258003457</td>\n",
" <td>2022-12-27 17:32:28+00:00</td>\n",
" <td>556.0</td>\n",
" <td>Context I realized I have to split up minGPT b...</td>\n",
" <td>2.0</td>\n",
" <td>23.0</td>\n",
" <td>16.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id_str id_str created_at \\\n",
"9 52667700 1637152715716583424 2023-03-18 18:03:18+00:00 \n",
"14 33836629 1637147822482165760 2023-03-18 17:43:52+00:00 \n",
"17 788533935886077952 1636786608916819968 2023-03-17 17:48:32+00:00 \n",
"18 33836629 1636765735627395073 2023-03-17 16:25:35+00:00 \n",
"20 33836629 1636459245184106497 2023-03-16 20:07:42+00:00 \n",
"144 33836629 1620875263700799488 2023-02-01 20:02:31+00:00 \n",
"145 65629552 1620850430254223360 2023-02-01 18:23:51+00:00 \n",
"146 33836629 1620811724952866816 2023-02-01 15:50:03+00:00 \n",
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
"150 33836629 1620187595979513857 2023-01-30 22:29:59+00:00 \n",
"151 1615441883672502291 1620185408721256449 2023-01-30 22:21:17+00:00 \n",
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
"186 33836629 1608895189078380544 2022-12-30 18:37:59+00:00 \n",
"190 33836629 1607791539258003457 2022-12-27 17:32:28+00:00 \n",
"\n",
" favorite_count full_text \\\n",
"9 99.0 @karpathy Sometimes I wish people could unders... \n",
"14 325.0 If not careful, fine-tuning collapses entropy ... \n",
"17 411.0 I finally installed github copilot (better lat... \n",
"18 22.0 @BlancheMinerva @JosephJacks_ I didnt work on... \n",
"20 1254.0 Less publicized but highly awesome aspect of G... \n",
"144 10.0 @portisto @trending_repos sad. The way they co... \n",
"145 7.0 @trending_repos @karpathy How can a main langu... \n",
"146 245.0 @trending_repos wow \n",
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
"150 15.0 @hi_tysam It was very nice to read through top... \n",
"151 15.0 @karpathy I'm honored and a bit stunned. Wow, ... \n",
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
"186 4356.0 Nice read on reverse engineering of GitHub Cop... \n",
"190 556.0 Context I realized I have to split up minGPT b... \n",
"\n",
" quote_count reply_count retweet_count \n",
"9 2.0 1.0 5.0 \n",
"14 5.0 9.0 21.0 \n",
"17 5.0 15.0 14.0 \n",
"18 0.0 4.0 1.0 \n",
"20 10.0 38.0 132.0 \n",
"144 0.0 1.0 2.0 \n",
"145 0.0 4.0 0.0 \n",
"146 0.0 6.0 4.0 \n",
"147 9.0 19.0 320.0 \n",
"150 0.0 1.0 2.0 \n",
"151 0.0 3.0 0.0 \n",
"178 24.0 39.0 303.0 \n",
"186 145.0 85.0 555.0 \n",
"190 2.0 23.0 16.0 "
]
},
"execution_count": 56,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[df.full_text.str.contains('repos?i?|github', regex=True, flags=re.I)]"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "96ebc3fd",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>user_id_str</th>\n",
" <th>id_str</th>\n",
" <th>created_at</th>\n",
" <th>favorite_count</th>\n",
" <th>full_text</th>\n",
" <th>quote_count</th>\n",
" <th>reply_count</th>\n",
" <th>retweet_count</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>58</th>\n",
" <td>1615441883672502291</td>\n",
" <td>1632577588529954819</td>\n",
" <td>2023-03-06 03:03:23+00:00</td>\n",
" <td>91.0</td>\n",
" <td>Speed up your LLM research exploration with a ...</td>\n",
" <td>2.0</td>\n",
" <td>3.0</td>\n",
" <td>14.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>143</th>\n",
" <td>33836629</td>\n",
" <td>1621578354024677377</td>\n",
" <td>2023-02-03 18:36:21+00:00</td>\n",
" <td>5276.0</td>\n",
" <td>The most dramatic optimization to nanoGPT so f...</td>\n",
" <td>57.0</td>\n",
" <td>89.0</td>\n",
" <td>353.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>1162359127294861314</td>\n",
" <td>1620749130556669952</td>\n",
" <td>2023-02-01 11:41:19+00:00</td>\n",
" <td>2541.0</td>\n",
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
" <td>9.0</td>\n",
" <td>19.0</td>\n",
" <td>320.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>172</th>\n",
" <td>33836629</td>\n",
" <td>1615398117683388417</td>\n",
" <td>2023-01-17 17:18:18+00:00</td>\n",
" <td>21166.0</td>\n",
" <td>🔥 New (1h56m) video lecture: \"Let's build GPT:...</td>\n",
" <td>331.0</td>\n",
" <td>546.0</td>\n",
" <td>3321.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>178</th>\n",
" <td>33836629</td>\n",
" <td>1613250487838707712</td>\n",
" <td>2023-01-11 19:04:23+00:00</td>\n",
" <td>2257.0</td>\n",
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
" <td>24.0</td>\n",
" <td>39.0</td>\n",
" <td>303.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" user_id_str id_str created_at \\\n",
"58 1615441883672502291 1632577588529954819 2023-03-06 03:03:23+00:00 \n",
"143 33836629 1621578354024677377 2023-02-03 18:36:21+00:00 \n",
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
"172 33836629 1615398117683388417 2023-01-17 17:18:18+00:00 \n",
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
"\n",
" favorite_count full_text \\\n",
"58 91.0 Speed up your LLM research exploration with a ... \n",
"143 5276.0 The most dramatic optimization to nanoGPT so f... \n",
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
"172 21166.0 🔥 New (1h56m) video lecture: \"Let's build GPT:... \n",
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
"\n",
" quote_count reply_count retweet_count \n",
"58 2.0 3.0 14.0 \n",
"143 57.0 89.0 353.0 \n",
"147 9.0 19.0 320.0 \n",
"172 331.0 546.0 3321.0 \n",
"178 24.0 39.0 303.0 "
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"flags = re.I\n",
"\n",
"(df\n",
" .query('full_text.str.contains(\"nanogpt\", regex=True, flags=@flags)', engine='python')\n",
" # .query(...)\n",
" # .query(...)\n",
" )"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.0"
}
},
"nbformat": 4,
"nbformat_minor": 5
}