mirror of
https://github.com/trevorhobenshield/twitter-api-client.git
synced 2025-12-19 18:12:11 -05:00
780 lines
28 KiB
Plaintext
780 lines
28 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"source": [
|
||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
|
||
],
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"id": "4739fa454bb20238"
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"id": "f65b5a54",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import re\n",
|
||
"import time\n",
|
||
"import pandas as pd\n",
|
||
"\n",
|
||
"from twitter.scraper import Scraper\n",
|
||
"from twitter.util import find_key"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"id": "1d7714a8",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"email, username, password = ..., ..., ...\n",
|
||
"scraper = Scraper(email, username, password)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"outputs": [],
|
||
"source": [
|
||
"tweets = scraper.tweets([33836629]).pop()\n",
|
||
"tweets_and_replies = scraper.tweets_and_replies([33836629]).pop()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false
|
||
},
|
||
"id": "d9c839bfb7d99004"
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "a1339a2b",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Find all unique urls in users tweets"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"id": "f64a96d9",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"unique_urls = set(find_key(tweets, 'expanded_url'))\n",
|
||
"unique_urls"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "c8184cd5",
|
||
"metadata": {},
|
||
"source": [
|
||
"### Get summary of user tweet data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"id": "9e87995c",
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tweet_data = []\n",
|
||
"for d in tweets + tweets_and_replies:\n",
|
||
" instructions = find_key(d, 'instructions').pop()\n",
|
||
" entries = find_key(instructions, 'entries').pop()\n",
|
||
" for entry in entries:\n",
|
||
" legacy = find_key(entry, 'legacy')\n",
|
||
" tweet_data.extend(legacy)\n",
|
||
"\n",
|
||
"user_key = 'can_dm' # filter using arbitrary key that only users have\n",
|
||
"expr = (x for x in tweet_data for k in x if k != user_key)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"id": "224d5078",
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>user_id_str</th>\n",
|
||
" <th>id_str</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>favorite_count</th>\n",
|
||
" <th>full_text</th>\n",
|
||
" <th>quote_count</th>\n",
|
||
" <th>reply_count</th>\n",
|
||
" <th>retweet_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1637213069301649408</td>\n",
|
||
" <td>2023-03-18 22:03:08+00:00</td>\n",
|
||
" <td>69.0</td>\n",
|
||
" <td>@theamazingdrj Yes the integration right into ...</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>1181493805356158978</td>\n",
|
||
" <td>1637212448674684928</td>\n",
|
||
" <td>2023-03-18 22:00:40+00:00</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>@karpathy How does it compare to using chatGPT...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1637188599967027200</td>\n",
|
||
" <td>2023-03-18 20:25:54+00:00</td>\n",
|
||
" <td>13.0</td>\n",
|
||
" <td>@ErikSchluntz Very likely</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>1374841081293021188</td>\n",
|
||
" <td>1637183652458283008</td>\n",
|
||
" <td>2023-03-18 20:06:14+00:00</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>@karpathy Do you think this will work well for...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1637154111333494784</td>\n",
|
||
" <td>2023-03-18 18:08:51+00:00</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>@aliapanahi logprobs kwarg https://t.co/4Uuh4V...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>219</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1600031572442218497</td>\n",
|
||
" <td>2022-12-06 07:37:08+00:00</td>\n",
|
||
" <td>248.0</td>\n",
|
||
" <td>😂 stop Riley probably up there as someone who ...</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>8.0</td>\n",
|
||
" <td>12.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>220</th>\n",
|
||
" <td>16535432</td>\n",
|
||
" <td>1600012570949058560</td>\n",
|
||
" <td>2022-12-06 06:21:38+00:00</td>\n",
|
||
" <td>1698.0</td>\n",
|
||
" <td>To get a sense of how hyped LLMs are right now...</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>47.0</td>\n",
|
||
" <td>96.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>221</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1593417987687473152</td>\n",
|
||
" <td>2022-11-18 01:37:07+00:00</td>\n",
|
||
" <td>206.0</td>\n",
|
||
" <td>If previous neural nets are special-purpose co...</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>222</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1528792715810394112</td>\n",
|
||
" <td>2022-05-23 17:39:21+00:00</td>\n",
|
||
" <td>3044.0</td>\n",
|
||
" <td>Something I've been doing for a few years that...</td>\n",
|
||
" <td>42.0</td>\n",
|
||
" <td>184.0</td>\n",
|
||
" <td>115.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>223</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1528453604515778560</td>\n",
|
||
" <td>2022-05-22 19:11:51+00:00</td>\n",
|
||
" <td>914.0</td>\n",
|
||
" <td>real-world data distribution is ~N(0,1)\\ngood ...</td>\n",
|
||
" <td>11.0</td>\n",
|
||
" <td>47.0</td>\n",
|
||
" <td>65.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>224 rows × 8 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" user_id_str id_str created_at \\\n",
|
||
"0 33836629 1637213069301649408 2023-03-18 22:03:08+00:00 \n",
|
||
"1 1181493805356158978 1637212448674684928 2023-03-18 22:00:40+00:00 \n",
|
||
"2 33836629 1637188599967027200 2023-03-18 20:25:54+00:00 \n",
|
||
"3 1374841081293021188 1637183652458283008 2023-03-18 20:06:14+00:00 \n",
|
||
"4 33836629 1637154111333494784 2023-03-18 18:08:51+00:00 \n",
|
||
".. ... ... ... \n",
|
||
"219 33836629 1600031572442218497 2022-12-06 07:37:08+00:00 \n",
|
||
"220 16535432 1600012570949058560 2022-12-06 06:21:38+00:00 \n",
|
||
"221 33836629 1593417987687473152 2022-11-18 01:37:07+00:00 \n",
|
||
"222 33836629 1528792715810394112 2022-05-23 17:39:21+00:00 \n",
|
||
"223 33836629 1528453604515778560 2022-05-22 19:11:51+00:00 \n",
|
||
"\n",
|
||
" favorite_count full_text \\\n",
|
||
"0 69.0 @theamazingdrj Yes the integration right into ... \n",
|
||
"1 9.0 @karpathy How does it compare to using chatGPT... \n",
|
||
"2 13.0 @ErikSchluntz Very likely \n",
|
||
"3 6.0 @karpathy Do you think this will work well for... \n",
|
||
"4 5.0 @aliapanahi logprobs kwarg https://t.co/4Uuh4V... \n",
|
||
".. ... ... \n",
|
||
"219 248.0 😂 stop Riley probably up there as someone who ... \n",
|
||
"220 1698.0 To get a sense of how hyped LLMs are right now... \n",
|
||
"221 206.0 If previous neural nets are special-purpose co... \n",
|
||
"222 3044.0 Something I've been doing for a few years that... \n",
|
||
"223 914.0 real-world data distribution is ~N(0,1)\\ngood ... \n",
|
||
"\n",
|
||
" quote_count reply_count retweet_count \n",
|
||
"0 1.0 6.0 4.0 \n",
|
||
"1 0.0 2.0 1.0 \n",
|
||
"2 0.0 1.0 1.0 \n",
|
||
"3 0.0 1.0 0.0 \n",
|
||
"4 0.0 1.0 1.0 \n",
|
||
".. ... ... ... \n",
|
||
"219 2.0 8.0 12.0 \n",
|
||
"220 18.0 47.0 96.0 \n",
|
||
"221 5.0 2.0 16.0 \n",
|
||
"222 42.0 184.0 115.0 \n",
|
||
"223 11.0 47.0 65.0 \n",
|
||
"\n",
|
||
"[224 rows x 8 columns]"
|
||
]
|
||
},
|
||
"execution_count": 60,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"## simple subset of relevant tweet fields \n",
|
||
"cols = [\n",
|
||
" 'user_id_str',\n",
|
||
" 'id_str',\n",
|
||
" 'created_at',\n",
|
||
" 'favorite_count',\n",
|
||
" 'full_text',\n",
|
||
" 'quote_count',\n",
|
||
" 'reply_count',\n",
|
||
" 'retweet_count',\n",
|
||
" # 'retweeted',\n",
|
||
" # 'conversation_id_str',\n",
|
||
" # 'favorited',\n",
|
||
" # 'is_quote_status',\n",
|
||
" # 'lang',\n",
|
||
" # 'quoted_status_id_str',\n",
|
||
"]\n",
|
||
"\n",
|
||
"df = pd.DataFrame(expr)[cols]\n",
|
||
"\n",
|
||
"df['created_at'] = pd.to_datetime(df['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
|
||
"\n",
|
||
"numeric = [\n",
|
||
" 'favorite_count',\n",
|
||
" 'quote_count',\n",
|
||
" 'reply_count',\n",
|
||
" 'retweet_count',\n",
|
||
"]\n",
|
||
"\n",
|
||
"df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')\n",
|
||
"\n",
|
||
"## drop duplicates, sort by date\n",
|
||
"df = (df\n",
|
||
" .dropna(subset='id_str')\n",
|
||
" .drop_duplicates(subset='id_str')\n",
|
||
" .sort_values('created_at', ascending=False)\n",
|
||
" .reset_index(drop=True)\n",
|
||
" )\n",
|
||
"\n",
|
||
"# df.to_feather(f'{time.time_ns()}.feather')\n",
|
||
"# df.to_parquet(f'{time.time_ns()}.parquet')\n",
|
||
"df.to_csv(f'{time.time_ns()}.csv', index=False)\n",
|
||
"\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"id": "9103413b",
|
||
"metadata": {},
|
||
"source": [
|
||
"### search tweet text"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"id": "401712a3",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>user_id_str</th>\n",
|
||
" <th>id_str</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>favorite_count</th>\n",
|
||
" <th>full_text</th>\n",
|
||
" <th>quote_count</th>\n",
|
||
" <th>reply_count</th>\n",
|
||
" <th>retweet_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>52667700</td>\n",
|
||
" <td>1637152715716583424</td>\n",
|
||
" <td>2023-03-18 18:03:18+00:00</td>\n",
|
||
" <td>99.0</td>\n",
|
||
" <td>@karpathy Sometimes I wish people could unders...</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>14</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1637147822482165760</td>\n",
|
||
" <td>2023-03-18 17:43:52+00:00</td>\n",
|
||
" <td>325.0</td>\n",
|
||
" <td>If not careful, fine-tuning collapses entropy ...</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>17</th>\n",
|
||
" <td>788533935886077952</td>\n",
|
||
" <td>1636786608916819968</td>\n",
|
||
" <td>2023-03-17 17:48:32+00:00</td>\n",
|
||
" <td>411.0</td>\n",
|
||
" <td>I finally installed github copilot (better lat...</td>\n",
|
||
" <td>5.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>18</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1636765735627395073</td>\n",
|
||
" <td>2023-03-17 16:25:35+00:00</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>@BlancheMinerva @JosephJacks_ I didn’t work on...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>20</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1636459245184106497</td>\n",
|
||
" <td>2023-03-16 20:07:42+00:00</td>\n",
|
||
" <td>1254.0</td>\n",
|
||
" <td>Less publicized but highly awesome aspect of G...</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>38.0</td>\n",
|
||
" <td>132.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>144</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1620875263700799488</td>\n",
|
||
" <td>2023-02-01 20:02:31+00:00</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>@portisto @trending_repos sad. The way they co...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>145</th>\n",
|
||
" <td>65629552</td>\n",
|
||
" <td>1620850430254223360</td>\n",
|
||
" <td>2023-02-01 18:23:51+00:00</td>\n",
|
||
" <td>7.0</td>\n",
|
||
" <td>@trending_repos @karpathy How can a main langu...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>146</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1620811724952866816</td>\n",
|
||
" <td>2023-02-01 15:50:03+00:00</td>\n",
|
||
" <td>245.0</td>\n",
|
||
" <td>@trending_repos wow</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>6.0</td>\n",
|
||
" <td>4.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147</th>\n",
|
||
" <td>1162359127294861314</td>\n",
|
||
" <td>1620749130556669952</td>\n",
|
||
" <td>2023-02-01 11:41:19+00:00</td>\n",
|
||
" <td>2541.0</td>\n",
|
||
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>320.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>150</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1620187595979513857</td>\n",
|
||
" <td>2023-01-30 22:29:59+00:00</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>@hi_tysam It was very nice to read through top...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>1.0</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>151</th>\n",
|
||
" <td>1615441883672502291</td>\n",
|
||
" <td>1620185408721256449</td>\n",
|
||
" <td>2023-01-30 22:21:17+00:00</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>@karpathy I'm honored and a bit stunned. Wow, ...</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>0.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>178</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1613250487838707712</td>\n",
|
||
" <td>2023-01-11 19:04:23+00:00</td>\n",
|
||
" <td>2257.0</td>\n",
|
||
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>39.0</td>\n",
|
||
" <td>303.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>186</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1608895189078380544</td>\n",
|
||
" <td>2022-12-30 18:37:59+00:00</td>\n",
|
||
" <td>4356.0</td>\n",
|
||
" <td>Nice read on reverse engineering of GitHub Cop...</td>\n",
|
||
" <td>145.0</td>\n",
|
||
" <td>85.0</td>\n",
|
||
" <td>555.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>190</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1607791539258003457</td>\n",
|
||
" <td>2022-12-27 17:32:28+00:00</td>\n",
|
||
" <td>556.0</td>\n",
|
||
" <td>Context I realized I have to split up minGPT b...</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>23.0</td>\n",
|
||
" <td>16.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" user_id_str id_str created_at \\\n",
|
||
"9 52667700 1637152715716583424 2023-03-18 18:03:18+00:00 \n",
|
||
"14 33836629 1637147822482165760 2023-03-18 17:43:52+00:00 \n",
|
||
"17 788533935886077952 1636786608916819968 2023-03-17 17:48:32+00:00 \n",
|
||
"18 33836629 1636765735627395073 2023-03-17 16:25:35+00:00 \n",
|
||
"20 33836629 1636459245184106497 2023-03-16 20:07:42+00:00 \n",
|
||
"144 33836629 1620875263700799488 2023-02-01 20:02:31+00:00 \n",
|
||
"145 65629552 1620850430254223360 2023-02-01 18:23:51+00:00 \n",
|
||
"146 33836629 1620811724952866816 2023-02-01 15:50:03+00:00 \n",
|
||
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
|
||
"150 33836629 1620187595979513857 2023-01-30 22:29:59+00:00 \n",
|
||
"151 1615441883672502291 1620185408721256449 2023-01-30 22:21:17+00:00 \n",
|
||
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
|
||
"186 33836629 1608895189078380544 2022-12-30 18:37:59+00:00 \n",
|
||
"190 33836629 1607791539258003457 2022-12-27 17:32:28+00:00 \n",
|
||
"\n",
|
||
" favorite_count full_text \\\n",
|
||
"9 99.0 @karpathy Sometimes I wish people could unders... \n",
|
||
"14 325.0 If not careful, fine-tuning collapses entropy ... \n",
|
||
"17 411.0 I finally installed github copilot (better lat... \n",
|
||
"18 22.0 @BlancheMinerva @JosephJacks_ I didn’t work on... \n",
|
||
"20 1254.0 Less publicized but highly awesome aspect of G... \n",
|
||
"144 10.0 @portisto @trending_repos sad. The way they co... \n",
|
||
"145 7.0 @trending_repos @karpathy How can a main langu... \n",
|
||
"146 245.0 @trending_repos wow \n",
|
||
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
|
||
"150 15.0 @hi_tysam It was very nice to read through top... \n",
|
||
"151 15.0 @karpathy I'm honored and a bit stunned. Wow, ... \n",
|
||
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
|
||
"186 4356.0 Nice read on reverse engineering of GitHub Cop... \n",
|
||
"190 556.0 Context I realized I have to split up minGPT b... \n",
|
||
"\n",
|
||
" quote_count reply_count retweet_count \n",
|
||
"9 2.0 1.0 5.0 \n",
|
||
"14 5.0 9.0 21.0 \n",
|
||
"17 5.0 15.0 14.0 \n",
|
||
"18 0.0 4.0 1.0 \n",
|
||
"20 10.0 38.0 132.0 \n",
|
||
"144 0.0 1.0 2.0 \n",
|
||
"145 0.0 4.0 0.0 \n",
|
||
"146 0.0 6.0 4.0 \n",
|
||
"147 9.0 19.0 320.0 \n",
|
||
"150 0.0 1.0 2.0 \n",
|
||
"151 0.0 3.0 0.0 \n",
|
||
"178 24.0 39.0 303.0 \n",
|
||
"186 145.0 85.0 555.0 \n",
|
||
"190 2.0 23.0 16.0 "
|
||
]
|
||
},
|
||
"execution_count": 56,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df[df.full_text.str.contains('repos?i?|github', regex=True, flags=re.I)]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"id": "96ebc3fd",
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>user_id_str</th>\n",
|
||
" <th>id_str</th>\n",
|
||
" <th>created_at</th>\n",
|
||
" <th>favorite_count</th>\n",
|
||
" <th>full_text</th>\n",
|
||
" <th>quote_count</th>\n",
|
||
" <th>reply_count</th>\n",
|
||
" <th>retweet_count</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>58</th>\n",
|
||
" <td>1615441883672502291</td>\n",
|
||
" <td>1632577588529954819</td>\n",
|
||
" <td>2023-03-06 03:03:23+00:00</td>\n",
|
||
" <td>91.0</td>\n",
|
||
" <td>Speed up your LLM research exploration with a ...</td>\n",
|
||
" <td>2.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>14.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>143</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1621578354024677377</td>\n",
|
||
" <td>2023-02-03 18:36:21+00:00</td>\n",
|
||
" <td>5276.0</td>\n",
|
||
" <td>The most dramatic optimization to nanoGPT so f...</td>\n",
|
||
" <td>57.0</td>\n",
|
||
" <td>89.0</td>\n",
|
||
" <td>353.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>147</th>\n",
|
||
" <td>1162359127294861314</td>\n",
|
||
" <td>1620749130556669952</td>\n",
|
||
" <td>2023-02-01 11:41:19+00:00</td>\n",
|
||
" <td>2541.0</td>\n",
|
||
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
|
||
" <td>9.0</td>\n",
|
||
" <td>19.0</td>\n",
|
||
" <td>320.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>172</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1615398117683388417</td>\n",
|
||
" <td>2023-01-17 17:18:18+00:00</td>\n",
|
||
" <td>21166.0</td>\n",
|
||
" <td>🔥 New (1h56m) video lecture: \"Let's build GPT:...</td>\n",
|
||
" <td>331.0</td>\n",
|
||
" <td>546.0</td>\n",
|
||
" <td>3321.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>178</th>\n",
|
||
" <td>33836629</td>\n",
|
||
" <td>1613250487838707712</td>\n",
|
||
" <td>2023-01-11 19:04:23+00:00</td>\n",
|
||
" <td>2257.0</td>\n",
|
||
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
|
||
" <td>24.0</td>\n",
|
||
" <td>39.0</td>\n",
|
||
" <td>303.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" user_id_str id_str created_at \\\n",
|
||
"58 1615441883672502291 1632577588529954819 2023-03-06 03:03:23+00:00 \n",
|
||
"143 33836629 1621578354024677377 2023-02-03 18:36:21+00:00 \n",
|
||
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
|
||
"172 33836629 1615398117683388417 2023-01-17 17:18:18+00:00 \n",
|
||
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
|
||
"\n",
|
||
" favorite_count full_text \\\n",
|
||
"58 91.0 Speed up your LLM research exploration with a ... \n",
|
||
"143 5276.0 The most dramatic optimization to nanoGPT so f... \n",
|
||
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
|
||
"172 21166.0 🔥 New (1h56m) video lecture: \"Let's build GPT:... \n",
|
||
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
|
||
"\n",
|
||
" quote_count reply_count retweet_count \n",
|
||
"58 2.0 3.0 14.0 \n",
|
||
"143 57.0 89.0 353.0 \n",
|
||
"147 9.0 19.0 320.0 \n",
|
||
"172 331.0 546.0 3321.0 \n",
|
||
"178 24.0 39.0 303.0 "
|
||
]
|
||
},
|
||
"execution_count": 55,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"flags = re.I\n",
|
||
"\n",
|
||
"(df\n",
|
||
" .query('full_text.str.contains(\"nanogpt\", regex=True, flags=@flags)', engine='python')\n",
|
||
" # .query(...)\n",
|
||
" # .query(...)\n",
|
||
" )"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.11.0"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|