mirror of
https://github.com/trevorhobenshield/twitter-api-client.git
synced 2025-12-19 09:58:30 -05:00
add example notebook
This commit is contained in:
@@ -1,401 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "fce8131509380867"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "6c99787b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"import pandas as pd\n",
|
||||
"from twitter.scraper import Scraper\n",
|
||||
"from twitter.util import *"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "dd3b7a57",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scraper = Scraper(session=init_session())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "17a91f72",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### get chat log"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "597b3a0f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"room_id = '1eaJbrAPnBVJX'\n",
|
||||
"spaces = scraper.spaces(rooms=[room_id], audio=0, chat=1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "faaa76b1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>room</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>twitter_id</th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>body</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:32:19</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:32:55</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:33:01</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td></td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:33:13</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td>Hi Alan, are you there?</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:33:17</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>I am.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>251</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 22:00:00</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td>Yeah, it's a great platform.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>252</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 22:00:03</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>Alright, cool. Sounds good. Thank you.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>253</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 22:00:03</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td>OK. Thank you.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>254</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 22:00:04</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>Right.</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>255</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 22:00:05</td>\n",
|
||||
" <td>1106321031566893057</td>\n",
|
||||
" <td>jimfarley98</td>\n",
|
||||
" <td>Bye.</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>256 rows × 5 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" room timestamp twitter_id username \n",
|
||||
"0 1eaJbrAPnBVJX 2023-05-25 21:32:19 1106321031566893057 jimfarley98 \\\n",
|
||||
"1 1eaJbrAPnBVJX 2023-05-25 21:32:55 1106321031566893057 jimfarley98 \n",
|
||||
"2 1eaJbrAPnBVJX 2023-05-25 21:33:01 1106321031566893057 jimfarley98 \n",
|
||||
"3 1eaJbrAPnBVJX 2023-05-25 21:33:13 1106321031566893057 jimfarley98 \n",
|
||||
"4 1eaJbrAPnBVJX 2023-05-25 21:33:17 44196397 elonmusk \n",
|
||||
".. ... ... ... ... \n",
|
||||
"251 1eaJbrAPnBVJX 2023-05-25 22:00:00 1106321031566893057 jimfarley98 \n",
|
||||
"252 1eaJbrAPnBVJX 2023-05-25 22:00:03 44196397 elonmusk \n",
|
||||
"253 1eaJbrAPnBVJX 2023-05-25 22:00:03 1106321031566893057 jimfarley98 \n",
|
||||
"254 1eaJbrAPnBVJX 2023-05-25 22:00:04 44196397 elonmusk \n",
|
||||
"255 1eaJbrAPnBVJX 2023-05-25 22:00:05 1106321031566893057 jimfarley98 \n",
|
||||
"\n",
|
||||
" body \n",
|
||||
"0 \n",
|
||||
"1 \n",
|
||||
"2 \n",
|
||||
"3 Hi Alan, are you there? \n",
|
||||
"4 I am. \n",
|
||||
".. ... \n",
|
||||
"251 Yeah, it's a great platform. \n",
|
||||
"252 Alright, cool. Sounds good. Thank you. \n",
|
||||
"253 OK. Thank you. \n",
|
||||
"254 Right. \n",
|
||||
"255 Bye. \n",
|
||||
"\n",
|
||||
"[256 rows x 5 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"chat = pd.json_normalize(spaces[0]['chat'])\n",
|
||||
"chat = chat[chat['payload.body.final'] == True]\n",
|
||||
"dates = ['payload.body.timestamp']\n",
|
||||
"chat[dates] = chat[dates].apply(pd.to_datetime, unit='ms').apply(lambda x: x.dt.strftime(\"%Y-%m-%d %H:%M:%S %z\"))\n",
|
||||
"chat = chat.sort_values('payload.body.timestamp').reset_index(drop=True)\n",
|
||||
"chat = chat[[\n",
|
||||
" 'payload.room',\n",
|
||||
" 'payload.body.timestamp',\n",
|
||||
" 'payload.sender.twitter_id',\n",
|
||||
" 'payload.body.username',\n",
|
||||
" 'payload.body.body',\n",
|
||||
"]]\n",
|
||||
"chat.columns = chat.columns.str.replace('(payload|body|sender).','',regex=True).str.replace('.','_')\n",
|
||||
"# chat.to_csv(f'{room_id}.csv',index=False)\n",
|
||||
"chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "118919dd",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### query chat"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3010c52a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>room</th>\n",
|
||||
" <th>timestamp</th>\n",
|
||||
" <th>twitter_id</th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>body</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>22</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:35:05</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>Yeah, well, well, it's certainly super excitin...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>30</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:36:00</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>API access so like you know like a Ford vehicl...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>33</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:36:23</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>And we're very, very much appreciative of of F...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>47</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:38:29</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>But but I think it is the the the teams have d...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>191</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:53:52</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>Yeah, I agree with that. We should probably no...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>211</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:56:13</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>So, you know, it is certainly the Tesla intent...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>248</th>\n",
|
||||
" <td>1eaJbrAPnBVJX</td>\n",
|
||||
" <td>2023-05-25 21:59:45</td>\n",
|
||||
" <td>44196397</td>\n",
|
||||
" <td>elonmusk</td>\n",
|
||||
" <td>Likewise there, it's an honor to be working wi...</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" room timestamp twitter_id username \n",
|
||||
"22 1eaJbrAPnBVJX 2023-05-25 21:35:05 44196397 elonmusk \\\n",
|
||||
"30 1eaJbrAPnBVJX 2023-05-25 21:36:00 44196397 elonmusk \n",
|
||||
"33 1eaJbrAPnBVJX 2023-05-25 21:36:23 44196397 elonmusk \n",
|
||||
"47 1eaJbrAPnBVJX 2023-05-25 21:38:29 44196397 elonmusk \n",
|
||||
"191 1eaJbrAPnBVJX 2023-05-25 21:53:52 44196397 elonmusk \n",
|
||||
"211 1eaJbrAPnBVJX 2023-05-25 21:56:13 44196397 elonmusk \n",
|
||||
"248 1eaJbrAPnBVJX 2023-05-25 21:59:45 44196397 elonmusk \n",
|
||||
"\n",
|
||||
" body \n",
|
||||
"22 Yeah, well, well, it's certainly super excitin... \n",
|
||||
"30 API access so like you know like a Ford vehicl... \n",
|
||||
"33 And we're very, very much appreciative of of F... \n",
|
||||
"47 But but I think it is the the the teams have d... \n",
|
||||
"191 Yeah, I agree with that. We should probably no... \n",
|
||||
"211 So, you know, it is certainly the Tesla intent... \n",
|
||||
"248 Likewise there, it's an honor to be working wi... "
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"flags = re.I\n",
|
||||
"(chat\n",
|
||||
" .query('body.str.contains(\"\\sford\",regex=True,flags=@flags)')\n",
|
||||
" .query('username.str.contains(\"elonmusk\",regex=True,flags=@flags)')\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"## alternatively\n",
|
||||
"# chat[\n",
|
||||
"# chat.body.str.contains('\\sford',regex=True,flags=re.I)\n",
|
||||
"# &\n",
|
||||
"# chat.username.str.contains('elonmusk',regex=True,flags=re.I)\n",
|
||||
"# ]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
1584
examples/example.ipynb
Normal file
1584
examples/example.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,370 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "802043b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### polars/pandas examples\n",
|
||||
"\n",
|
||||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "54cd50bf",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip uninstall twitter-api-client -y\n",
|
||||
"# !pip install twitter-api-client --no-cache-dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "a3172006",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"import orjson\n",
|
||||
"import pandas as pd\n",
|
||||
"import polars as pl\n",
|
||||
"\n",
|
||||
"from twitter.util import find_key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4703bee3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def to_int(tdf: pl.LazyFrame, *args) -> pl.LazyFrame:\n",
|
||||
" return tdf.with_columns(pl.col(col).cast(pl.Int64, strict=False).alias(col) for col in args)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def to_dt(tdf: pl.LazyFrame, fmt: str, *args) -> pl.LazyFrame:\n",
|
||||
" return tdf.with_columns(pl.col(col).str.strptime(pl.Datetime, fmt).alias(col) for col in args)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(path: Path, expr: str = '', **kwargs) -> dict:\n",
|
||||
" D = {}\n",
|
||||
" for p in path.rglob('*'):\n",
|
||||
" if re.search(expr, p.name, **kwargs):\n",
|
||||
" D.setdefault(p.stem.split('_')[-1], []).append(orjson.loads(p.read_bytes()))\n",
|
||||
" return D"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b0addc33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PATH = Path('data/raw')\n",
|
||||
"\n",
|
||||
"# filter for users who favorited or retweeted a tweet\n",
|
||||
"data = get_data(PATH, expr='Favoriters|Retweeters')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09efb374",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### polars"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e3a70d0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_user_details(data: dict, cols: list = None, sort: str = 'created_at') -> pl.LazyFrame:\n",
|
||||
" numeric = [\n",
|
||||
" 'fast_followers_count',\n",
|
||||
" 'favourites_count',\n",
|
||||
" 'followers_count',\n",
|
||||
" 'friends_count',\n",
|
||||
" 'listed_count',\n",
|
||||
" 'media_count',\n",
|
||||
" 'normal_followers_count',\n",
|
||||
" 'statuses_count',\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
" D = []\n",
|
||||
" for u in find_key(data, 'user_results'):\n",
|
||||
" x = u.get('result', {})\n",
|
||||
" y = x.get('rest_id')\n",
|
||||
" if z := x.get('legacy', {}):\n",
|
||||
" D.append({'rest_id': y} | z)\n",
|
||||
"\n",
|
||||
" return (\n",
|
||||
" pl.LazyFrame(D)\n",
|
||||
" .unique(subset='rest_id')\n",
|
||||
" .pipe(to_dt, '%a %b %d %H:%M:%S %z %Y', 'created_at')\n",
|
||||
" .pipe(to_int, *numeric)\n",
|
||||
" .sort(sort.strip(\"-\"), descending=\"-\" not in sort)\n",
|
||||
" .select(cols)\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "91495fc2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><style>\n",
|
||||
".dataframe > thead > tr > th,\n",
|
||||
".dataframe > tbody > tr > td {\n",
|
||||
" text-align: right;\n",
|
||||
"}\n",
|
||||
"</style>\n",
|
||||
"<small>shape: (1855, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>created_at</th><th>screen_name</th><th>followers_count</th></tr><tr><td>datetime[μs, +00:00]</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>2007-03-31 01:16:45 +00:00</td><td>"TheLos"</td><td>1601</td></tr><tr><td>2008-03-18 19:04:59 +00:00</td><td>"wickedjava"</td><td>2986</td></tr><tr><td>2008-04-17 17:30:21 +00:00</td><td>"needless_input...</td><td>218</td></tr><tr><td>2008-06-27 08:58:13 +00:00</td><td>"DebrisStorm"</td><td>178</td></tr><tr><td>2008-07-26 21:58:07 +00:00</td><td>"daka17"</td><td>66</td></tr><tr><td>2008-09-03 23:27:25 +00:00</td><td>"heyitsaaron"</td><td>1230</td></tr><tr><td>2008-09-11 23:37:14 +00:00</td><td>"marinamiss"</td><td>771</td></tr><tr><td>2008-09-18 13:59:25 +00:00</td><td>"shangrila79"</td><td>229</td></tr><tr><td>2008-10-11 07:18:09 +00:00</td><td>"fridayschild71...</td><td>183</td></tr><tr><td>2008-10-27 19:40:43 +00:00</td><td>"Jacelendrahz"</td><td>188</td></tr><tr><td>2008-11-06 21:50:56 +00:00</td><td>"yolo_pinyato"</td><td>2944</td></tr><tr><td>2008-12-05 07:33:23 +00:00</td><td>"El_Dandy40"</td><td>205</td></tr><tr><td>…</td><td>…</td><td>…</td></tr><tr><td>2023-02-06 15:48:26 +00:00</td><td>"CosmicGhidorah...</td><td>11</td></tr><tr><td>2023-02-08 21:09:17 +00:00</td><td>"backupfHell"</td><td>14</td></tr><tr><td>2023-02-09 19:24:12 +00:00</td><td>"KayFabulous80"</td><td>144</td></tr><tr><td>2023-02-14 04:06:11 +00:00</td><td>"HDBNGRClub"</td><td>3</td></tr><tr><td>2023-02-16 18:38:48 +00:00</td><td>"SladjaMilov14"</td><td>1</td></tr><tr><td>2023-02-17 22:38:58 +00:00</td><td>"c0pas27"</td><td>53</td></tr><tr><td>2023-02-19 06:35:24 +00:00</td><td>"B4NKSCLUB"</td><td>13</td></tr><tr><td>2023-02-19 07:06:15 +00:00</td><td>"Later_Hayter"</td><td>54</td></tr><tr><td>2023-02-21 06:47:49 +00:00</td><td>"hart_kanya"</td><td>2</td></tr><tr><td>2023-02-26 09:43:04 +00:00</td><td>"_Val_Nichole"</td><td>62</td></tr><tr><td>2023-03-04 23:50:32 +00:00</td><td>"Chublosophy"</td><td>346</td></tr><tr><td>2023-03-05 20:56:30 +00:00</td><td>"Erron_20"</td><td>8</td></tr></tbody></table></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (1855, 3)\n",
|
||||
"┌────────────────────────────┬────────────────┬─────────────────┐\n",
|
||||
"│ created_at ┆ screen_name ┆ followers_count │\n",
|
||||
"│ --- ┆ --- ┆ --- │\n",
|
||||
"│ datetime[μs, +00:00] ┆ str ┆ i64 │\n",
|
||||
"╞════════════════════════════╪════════════════╪═════════════════╡\n",
|
||||
"│ 2007-03-31 01:16:45 +00:00 ┆ TheLos ┆ 1601 │\n",
|
||||
"│ 2008-03-18 19:04:59 +00:00 ┆ wickedjava ┆ 2986 │\n",
|
||||
"│ 2008-04-17 17:30:21 +00:00 ┆ needless_input ┆ 218 │\n",
|
||||
"│ 2008-06-27 08:58:13 +00:00 ┆ DebrisStorm ┆ 178 │\n",
|
||||
"│ … ┆ … ┆ … │\n",
|
||||
"│ 2023-02-21 06:47:49 +00:00 ┆ hart_kanya ┆ 2 │\n",
|
||||
"│ 2023-02-26 09:43:04 +00:00 ┆ _Val_Nichole ┆ 62 │\n",
|
||||
"│ 2023-03-04 23:50:32 +00:00 ┆ Chublosophy ┆ 346 │\n",
|
||||
"│ 2023-03-05 20:56:30 +00:00 ┆ Erron_20 ┆ 8 │\n",
|
||||
"└────────────────────────────┴────────────────┴─────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"lf = get_user_details(\n",
|
||||
" data,\n",
|
||||
" cols=['created_at', 'screen_name', 'followers_count'],\n",
|
||||
" sort='-created_at',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"lf.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "03aa8cc0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4815e47f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_user_details2(data: dict, cols: list = None, sort: str = 'created_at') -> pd.DataFrame:\n",
|
||||
" D = []\n",
|
||||
" for u in find_key(data, 'user_results'):\n",
|
||||
" x = u.get('result', {})\n",
|
||||
" y = x.get('rest_id')\n",
|
||||
" if z := x.get('legacy', {}):\n",
|
||||
" D.append({'rest_id': y} | z)\n",
|
||||
" df = (\n",
|
||||
" pd.DataFrame(D)\n",
|
||||
" .drop_duplicates('rest_id')\n",
|
||||
" .assign(created_at=lambda x: pd.to_datetime(x['created_at']))\n",
|
||||
" .sort_values(sort.strip('-'), ascending='-' in sort)\n",
|
||||
" .reset_index(drop=True)\n",
|
||||
" )\n",
|
||||
" n = [x for x in df.columns if 'count' in x]\n",
|
||||
" df[n] = df[n].apply(pd.to_numeric, errors='coerce')\n",
|
||||
" return df[cols] if cols else df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "feb0251b",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>screen_name</th>\n",
|
||||
" <th>followers_count</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2007-03-31 01:16:45+00:00</td>\n",
|
||||
" <td>TheLos</td>\n",
|
||||
" <td>1601</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2008-03-18 19:04:59+00:00</td>\n",
|
||||
" <td>wickedjava</td>\n",
|
||||
" <td>2986</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2008-04-17 17:30:21+00:00</td>\n",
|
||||
" <td>needless_input</td>\n",
|
||||
" <td>218</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2008-06-27 08:58:13+00:00</td>\n",
|
||||
" <td>DebrisStorm</td>\n",
|
||||
" <td>178</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2008-07-26 21:58:07+00:00</td>\n",
|
||||
" <td>daka17</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1850</th>\n",
|
||||
" <td>2023-02-19 07:06:15+00:00</td>\n",
|
||||
" <td>Later_Hayter</td>\n",
|
||||
" <td>54</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1851</th>\n",
|
||||
" <td>2023-02-21 06:47:49+00:00</td>\n",
|
||||
" <td>hart_kanya</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1852</th>\n",
|
||||
" <td>2023-02-26 09:43:04+00:00</td>\n",
|
||||
" <td>_Val_Nichole</td>\n",
|
||||
" <td>62</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1853</th>\n",
|
||||
" <td>2023-03-04 23:50:32+00:00</td>\n",
|
||||
" <td>Chublosophy</td>\n",
|
||||
" <td>346</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1854</th>\n",
|
||||
" <td>2023-03-05 20:56:30+00:00</td>\n",
|
||||
" <td>Erron_20</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>1855 rows × 3 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" created_at screen_name followers_count\n",
|
||||
"0 2007-03-31 01:16:45+00:00 TheLos 1601\n",
|
||||
"1 2008-03-18 19:04:59+00:00 wickedjava 2986\n",
|
||||
"2 2008-04-17 17:30:21+00:00 needless_input 218\n",
|
||||
"3 2008-06-27 08:58:13+00:00 DebrisStorm 178\n",
|
||||
"4 2008-07-26 21:58:07+00:00 daka17 66\n",
|
||||
"... ... ... ...\n",
|
||||
"1850 2023-02-19 07:06:15+00:00 Later_Hayter 54\n",
|
||||
"1851 2023-02-21 06:47:49+00:00 hart_kanya 2\n",
|
||||
"1852 2023-02-26 09:43:04+00:00 _Val_Nichole 62\n",
|
||||
"1853 2023-03-04 23:50:32+00:00 Chublosophy 346\n",
|
||||
"1854 2023-03-05 20:56:30+00:00 Erron_20 8\n",
|
||||
"\n",
|
||||
"[1855 rows x 3 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"PATH = Path('data/raw')\n",
|
||||
"\n",
|
||||
"data = get_data(PATH, expr='Favoriters|Retweeters') # filter for users who favorited or retweeted a tweet\n",
|
||||
"\n",
|
||||
"df = get_user_details2(\n",
|
||||
" data,\n",
|
||||
" cols = ['created_at','screen_name','followers_count'],\n",
|
||||
" sort = '-created_at',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,353 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "85ee96fb4899b369"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "d815a387",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip uninstall twitter-api-client -y\n",
|
||||
"# !pip install twitter-api-client --no-cache-dir"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c1ecf8cb",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from twitter.search import Search\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"email, username, password = ..., ..., ...\n",
|
||||
"search = Search(email, username, password)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "98c65601",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"2023-05-18 21:20:12,075.075 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n",
|
||||
"2023-05-18 21:20:12,656.656 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n",
|
||||
"2023-05-18 21:20:13,452.452 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n",
|
||||
"2023-05-18 21:20:13,899.899 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n",
|
||||
"2023-05-18 21:20:14,539.539 DEBUG: \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n",
|
||||
"2023-05-18 21:20:14,938.938 DEBUG: [\u001B[32msuccess\u001B[0m] returned 101 search results for \u001B[37mjennifer hudson since:2023-05-18\u001B[0m\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>id</th>\n",
|
||||
" <th>user_id</th>\n",
|
||||
" <th>full_text</th>\n",
|
||||
" <th>lang</th>\n",
|
||||
" <th>user_url</th>\n",
|
||||
" <th>tweet_url</th>\n",
|
||||
" <th>geo</th>\n",
|
||||
" <th>coordinates</th>\n",
|
||||
" <th>place</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2023-05-19 04:07:19+00:00</td>\n",
|
||||
" <td>1659410380026773509</td>\n",
|
||||
" <td>809177430602776576</td>\n",
|
||||
" <td>@msdarlin_ JENNIFER HUDSON first considered al...</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/809177430602776576</td>\n",
|
||||
" <td>https://twitter.com/i/status/1659410380026773509</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2023-05-19 03:50:16+00:00</td>\n",
|
||||
" <td>1659406088578428929</td>\n",
|
||||
" <td>21226048</td>\n",
|
||||
" <td>Jennifer Hudson - Believe https://t.co/vjqlw52MjO</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/21226048</td>\n",
|
||||
" <td>https://twitter.com/i/status/1659406088578428929</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2023-05-19 03:03:12+00:00</td>\n",
|
||||
" <td>1659394245835255808</td>\n",
|
||||
" <td>174826024</td>\n",
|
||||
" <td>If Fantasia and Jennifer Hudson do this verzuz...</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/174826024</td>\n",
|
||||
" <td>https://twitter.com/i/status/1659394245835255808</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2023-05-19 02:44:21+00:00</td>\n",
|
||||
" <td>1659389499221188609</td>\n",
|
||||
" <td>1143382733001039873</td>\n",
|
||||
" <td>jennifer hudson acabou de postar uma foto e no...</td>\n",
|
||||
" <td>pt</td>\n",
|
||||
" <td>https://twitter.com/i/user/1143382733001039873</td>\n",
|
||||
" <td>https://twitter.com/i/status/1659389499221188609</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2023-05-19 02:41:35+00:00</td>\n",
|
||||
" <td>1659388805118578689</td>\n",
|
||||
" <td>1342931884150464512</td>\n",
|
||||
" <td>Jennifer Hudson</td>\n",
|
||||
" <td>cy</td>\n",
|
||||
" <td>https://twitter.com/i/user/1342931884150464512</td>\n",
|
||||
" <td>https://twitter.com/i/status/1659388805118578689</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>96</th>\n",
|
||||
" <td>2023-05-17 20:57:29+00:00</td>\n",
|
||||
" <td>1658939820574400516</td>\n",
|
||||
" <td>534285941</td>\n",
|
||||
" <td>I can’t 🤣🤣 https://t.co/2tiIyHrMb7</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/534285941</td>\n",
|
||||
" <td>https://twitter.com/i/status/1658939820574400516</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>97</th>\n",
|
||||
" <td>2023-05-17 19:46:21+00:00</td>\n",
|
||||
" <td>1658921918890758148</td>\n",
|
||||
" <td>417935020</td>\n",
|
||||
" <td>Idk ask her https://t.co/md7BJf59C2</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/417935020</td>\n",
|
||||
" <td>https://twitter.com/i/status/1658921918890758148</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>98</th>\n",
|
||||
" <td>2023-05-17 19:09:50+00:00</td>\n",
|
||||
" <td>1658912730991009795</td>\n",
|
||||
" <td>2384861195</td>\n",
|
||||
" <td>My best hip hop female Dj @ChainzMsDj Dancing ...</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/2384861195</td>\n",
|
||||
" <td>https://twitter.com/i/status/1658912730991009795</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>99</th>\n",
|
||||
" <td>2023-05-17 16:56:52+00:00</td>\n",
|
||||
" <td>1658879269232320514</td>\n",
|
||||
" <td>15733529</td>\n",
|
||||
" <td>Kelly will sing with D. Smooth\\n\\nThe Complete...</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/15733529</td>\n",
|
||||
" <td>https://twitter.com/i/status/1658879269232320514</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>100</th>\n",
|
||||
" <td>2023-05-17 14:20:31+00:00</td>\n",
|
||||
" <td>1658839919278653444</td>\n",
|
||||
" <td>17230018</td>\n",
|
||||
" <td>my dream collab? gimme _____ and ______.</td>\n",
|
||||
" <td>en</td>\n",
|
||||
" <td>https://twitter.com/i/user/17230018</td>\n",
|
||||
" <td>https://twitter.com/i/status/1658839919278653444</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>101 rows × 10 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" created_at id user_id \n",
|
||||
"0 2023-05-19 04:07:19+00:00 1659410380026773509 809177430602776576 \\\n",
|
||||
"1 2023-05-19 03:50:16+00:00 1659406088578428929 21226048 \n",
|
||||
"2 2023-05-19 03:03:12+00:00 1659394245835255808 174826024 \n",
|
||||
"3 2023-05-19 02:44:21+00:00 1659389499221188609 1143382733001039873 \n",
|
||||
"4 2023-05-19 02:41:35+00:00 1659388805118578689 1342931884150464512 \n",
|
||||
".. ... ... ... \n",
|
||||
"96 2023-05-17 20:57:29+00:00 1658939820574400516 534285941 \n",
|
||||
"97 2023-05-17 19:46:21+00:00 1658921918890758148 417935020 \n",
|
||||
"98 2023-05-17 19:09:50+00:00 1658912730991009795 2384861195 \n",
|
||||
"99 2023-05-17 16:56:52+00:00 1658879269232320514 15733529 \n",
|
||||
"100 2023-05-17 14:20:31+00:00 1658839919278653444 17230018 \n",
|
||||
"\n",
|
||||
" full_text lang \n",
|
||||
"0 @msdarlin_ JENNIFER HUDSON first considered al... en \\\n",
|
||||
"1 Jennifer Hudson - Believe https://t.co/vjqlw52MjO en \n",
|
||||
"2 If Fantasia and Jennifer Hudson do this verzuz... en \n",
|
||||
"3 jennifer hudson acabou de postar uma foto e no... pt \n",
|
||||
"4 Jennifer Hudson cy \n",
|
||||
".. ... ... \n",
|
||||
"96 I can’t 🤣🤣 https://t.co/2tiIyHrMb7 en \n",
|
||||
"97 Idk ask her https://t.co/md7BJf59C2 en \n",
|
||||
"98 My best hip hop female Dj @ChainzMsDj Dancing ... en \n",
|
||||
"99 Kelly will sing with D. Smooth\\n\\nThe Complete... en \n",
|
||||
"100 my dream collab? gimme _____ and ______. en \n",
|
||||
"\n",
|
||||
" user_url \n",
|
||||
"0 https://twitter.com/i/user/809177430602776576 \\\n",
|
||||
"1 https://twitter.com/i/user/21226048 \n",
|
||||
"2 https://twitter.com/i/user/174826024 \n",
|
||||
"3 https://twitter.com/i/user/1143382733001039873 \n",
|
||||
"4 https://twitter.com/i/user/1342931884150464512 \n",
|
||||
".. ... \n",
|
||||
"96 https://twitter.com/i/user/534285941 \n",
|
||||
"97 https://twitter.com/i/user/417935020 \n",
|
||||
"98 https://twitter.com/i/user/2384861195 \n",
|
||||
"99 https://twitter.com/i/user/15733529 \n",
|
||||
"100 https://twitter.com/i/user/17230018 \n",
|
||||
"\n",
|
||||
" tweet_url geo coordinates place \n",
|
||||
"0 https://twitter.com/i/status/1659410380026773509 None None None \n",
|
||||
"1 https://twitter.com/i/status/1659406088578428929 None None None \n",
|
||||
"2 https://twitter.com/i/status/1659394245835255808 None None None \n",
|
||||
"3 https://twitter.com/i/status/1659389499221188609 None None None \n",
|
||||
"4 https://twitter.com/i/status/1659388805118578689 None None None \n",
|
||||
".. ... ... ... ... \n",
|
||||
"96 https://twitter.com/i/status/1658939820574400516 None None None \n",
|
||||
"97 https://twitter.com/i/status/1658921918890758148 None None None \n",
|
||||
"98 https://twitter.com/i/status/1658912730991009795 None None None \n",
|
||||
"99 https://twitter.com/i/status/1658879269232320514 None None None \n",
|
||||
"100 https://twitter.com/i/status/1658839919278653444 None None None \n",
|
||||
"\n",
|
||||
"[101 rows x 10 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"latest_results = search.run(\n",
|
||||
" 'jennifer hudson since:2023-05-18',\n",
|
||||
" limit=100,\n",
|
||||
" latest=True, # get latest tweets only\n",
|
||||
" retries=3,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"flat_results = [y for x in latest_results for y in x]\n",
|
||||
"data = [r.get('globalObjects', {}).get('tweets', {})for r in flat_results]\n",
|
||||
"base= 'https://twitter.com/i'\n",
|
||||
"\n",
|
||||
"df = (\n",
|
||||
" pd.DataFrame({k:v for d in data for k,v in d.items()})\n",
|
||||
" .T\n",
|
||||
" .assign(created_at = lambda x: pd.to_datetime(x['created_at'], format='%a %b %d %H:%M:%S %z %Y'))\n",
|
||||
" .assign(user_url = lambda x: f\"{base}/user/\"+x['user_id_str'])\n",
|
||||
" .assign(tweet_url = lambda x: f\"{base}/status/\"+x['id_str'] )\n",
|
||||
" .sort_values('created_at',ascending=False)\n",
|
||||
" .drop_duplicates('id')\n",
|
||||
" .reset_index(drop=True)\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# sample df with a few cols of interest\n",
|
||||
"sample = df[['created_at','id','user_id','full_text', 'lang',\n",
|
||||
" 'user_url', 'tweet_url', 'geo', 'coordinates', 'place']]\n",
|
||||
"\n",
|
||||
"sample"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,258 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "56e4acd2bbce8025"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "2f27b8db",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# !pip install twitter-api-client==0.9.0"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "7885eecd",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from twitter.scraper import Scraper\n",
|
||||
"from twitter.util import *\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"scraper = Scraper(session=init_session())\n",
|
||||
"\n",
|
||||
"# example tweet\n",
|
||||
"tweet = scraper.tweets_details([1476988122986647553], limit=500, pbar=False)\n",
|
||||
"\n",
|
||||
"# unnest items and filter deleted tweets\n",
|
||||
"items = [y for x in find_key(tweet, 'items') for y in x if not find_key(y, 'tombstone')]\n",
|
||||
"\n",
|
||||
"# index into relevant data points\n",
|
||||
"tweet_results = [x.get('result') for x in find_key(items, 'tweet_results')]\n",
|
||||
"\n",
|
||||
"df = (\n",
|
||||
" pd.json_normalize(tweet_results)\n",
|
||||
" # remove duplicate replies if needed\n",
|
||||
" .drop_duplicates('rest_id')\n",
|
||||
" # clean up column names for illustrative purposes\n",
|
||||
" .assign(date=lambda x: pd.to_datetime(x['legacy.created_at']).dt.strftime(\"%Y-%m-%d %H:%M:%S\"))\n",
|
||||
" .assign(root_tweet=lambda x: x['legacy.conversation_id_str'])\n",
|
||||
" .assign(text=lambda x: x['legacy.full_text'])\n",
|
||||
" .assign(tweet=lambda x: x['rest_id'])\n",
|
||||
" .assign(username=lambda x: x['core.user_results.result.legacy.screen_name'])\n",
|
||||
" # sort by newest replies to root_tweet\n",
|
||||
" .sort_values('date', ascending=False)\n",
|
||||
" .reset_index(drop=True)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "f5ddad65",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>date</th>\n",
|
||||
" <th>username</th>\n",
|
||||
" <th>tweet</th>\n",
|
||||
" <th>text</th>\n",
|
||||
" <th>root_tweet</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2023-05-25 19:52:44</td>\n",
|
||||
" <td>dtweedys</td>\n",
|
||||
" <td>1661822628393254913</td>\n",
|
||||
" <td>@MKBHD 🤺🔸💜 #SubTweets ftw 🔸🌑🕊️</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2023-05-07 14:30:30</td>\n",
|
||||
" <td>BallSchlonger</td>\n",
|
||||
" <td>1655218553858015233</td>\n",
|
||||
" <td>@jacknft_4 @MKBHD sorry i cant</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2023-05-03 21:13:33</td>\n",
|
||||
" <td>jacknft_4</td>\n",
|
||||
" <td>1653870434075410436</td>\n",
|
||||
" <td>@MKBHD Great! Dm me please</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2023-05-02 17:38:40</td>\n",
|
||||
" <td>tanmayshah28</td>\n",
|
||||
" <td>1653453969476456450</td>\n",
|
||||
" <td>@MKBHD Thank you! Lets too!</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2023-04-02 14:08:55</td>\n",
|
||||
" <td>iStevenPlays</td>\n",
|
||||
" <td>1642529545415712768</td>\n",
|
||||
" <td>@MKBHD I got you.\\n\\n‘🔥__🔥’</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>199</th>\n",
|
||||
" <td>2021-12-31 18:46:59</td>\n",
|
||||
" <td>sbeams</td>\n",
|
||||
" <td>1476988269187448832</td>\n",
|
||||
" <td>@MKBHD https://t.co/1Fu5cKmPTn</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>200</th>\n",
|
||||
" <td>2021-12-31 18:46:48</td>\n",
|
||||
" <td>woworeoo</td>\n",
|
||||
" <td>1476988222609793033</td>\n",
|
||||
" <td>@MKBHD Yeah Yeah no more tweets from you</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>201</th>\n",
|
||||
" <td>2021-12-31 18:46:46</td>\n",
|
||||
" <td>disisjorj</td>\n",
|
||||
" <td>1476988217576640517</td>\n",
|
||||
" <td>@MKBHD @vassizzle</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>202</th>\n",
|
||||
" <td>2021-12-31 18:46:46</td>\n",
|
||||
" <td>TechWizYT</td>\n",
|
||||
" <td>1476988215043276802</td>\n",
|
||||
" <td>@MKBHD Great advice!</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>203</th>\n",
|
||||
" <td>2021-12-31 18:46:41</td>\n",
|
||||
" <td>0x_flea</td>\n",
|
||||
" <td>1476988197146185730</td>\n",
|
||||
" <td>@MKBHD ❤️🤘🏼</td>\n",
|
||||
" <td>1476988122986647553</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>204 rows × 5 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" date username tweet \n",
|
||||
"0 2023-05-25 19:52:44 dtweedys 1661822628393254913 \\\n",
|
||||
"1 2023-05-07 14:30:30 BallSchlonger 1655218553858015233 \n",
|
||||
"2 2023-05-03 21:13:33 jacknft_4 1653870434075410436 \n",
|
||||
"3 2023-05-02 17:38:40 tanmayshah28 1653453969476456450 \n",
|
||||
"4 2023-04-02 14:08:55 iStevenPlays 1642529545415712768 \n",
|
||||
".. ... ... ... \n",
|
||||
"199 2021-12-31 18:46:59 sbeams 1476988269187448832 \n",
|
||||
"200 2021-12-31 18:46:48 woworeoo 1476988222609793033 \n",
|
||||
"201 2021-12-31 18:46:46 disisjorj 1476988217576640517 \n",
|
||||
"202 2021-12-31 18:46:46 TechWizYT 1476988215043276802 \n",
|
||||
"203 2021-12-31 18:46:41 0x_flea 1476988197146185730 \n",
|
||||
"\n",
|
||||
" text root_tweet \n",
|
||||
"0 @MKBHD 🤺🔸💜 #SubTweets ftw 🔸🌑🕊️ 1476988122986647553 \n",
|
||||
"1 @jacknft_4 @MKBHD sorry i cant 1476988122986647553 \n",
|
||||
"2 @MKBHD Great! Dm me please 1476988122986647553 \n",
|
||||
"3 @MKBHD Thank you! Lets too! 1476988122986647553 \n",
|
||||
"4 @MKBHD I got you.\\n\\n‘🔥__🔥’ 1476988122986647553 \n",
|
||||
".. ... ... \n",
|
||||
"199 @MKBHD https://t.co/1Fu5cKmPTn 1476988122986647553 \n",
|
||||
"200 @MKBHD Yeah Yeah no more tweets from you 1476988122986647553 \n",
|
||||
"201 @MKBHD @vassizzle 1476988122986647553 \n",
|
||||
"202 @MKBHD Great advice! 1476988122986647553 \n",
|
||||
"203 @MKBHD ❤️🤘🏼 1476988122986647553 \n",
|
||||
"\n",
|
||||
"[204 rows x 5 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df[[\n",
|
||||
" 'date',\n",
|
||||
" 'username',\n",
|
||||
" 'tweet',\n",
|
||||
" 'text',\n",
|
||||
" 'root_tweet',\n",
|
||||
"]]"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
@@ -1,779 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"> Note: structure of GraphQL response is not consistent, these examples may not work in all cases."
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "4739fa454bb20238"
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f65b5a54",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"import time\n",
|
||||
"import pandas as pd\n",
|
||||
"\n",
|
||||
"from twitter.scraper import Scraper\n",
|
||||
"from twitter.util import find_key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 62,
|
||||
"id": "1d7714a8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"email, username, password = ..., ..., ...\n",
|
||||
"scraper = Scraper(email, username, password)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tweets = scraper.tweets([33836629]).pop()\n",
|
||||
"tweets_and_replies = scraper.tweets_and_replies([33836629]).pop()"
|
||||
],
|
||||
"metadata": {
|
||||
"collapsed": false
|
||||
},
|
||||
"id": "d9c839bfb7d99004"
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "a1339a2b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Find all unique urls in users tweets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 63,
|
||||
"id": "f64a96d9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"unique_urls = set(find_key(tweets, 'expanded_url'))\n",
|
||||
"unique_urls"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "c8184cd5",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get summary of user tweet data"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 57,
|
||||
"id": "9e87995c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"tweet_data = []\n",
|
||||
"for d in tweets + tweets_and_replies:\n",
|
||||
" instructions = find_key(d, 'instructions').pop()\n",
|
||||
" entries = find_key(instructions, 'entries').pop()\n",
|
||||
" for entry in entries:\n",
|
||||
" legacy = find_key(entry, 'legacy')\n",
|
||||
" tweet_data.extend(legacy)\n",
|
||||
"\n",
|
||||
"user_key = 'can_dm' # filter using arbitrary key that only users have\n",
|
||||
"expr = (x for x in tweet_data for k in x if k != user_key)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 60,
|
||||
"id": "224d5078",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>user_id_str</th>\n",
|
||||
" <th>id_str</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>favorite_count</th>\n",
|
||||
" <th>full_text</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1637213069301649408</td>\n",
|
||||
" <td>2023-03-18 22:03:08+00:00</td>\n",
|
||||
" <td>69.0</td>\n",
|
||||
" <td>@theamazingdrj Yes the integration right into ...</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>1181493805356158978</td>\n",
|
||||
" <td>1637212448674684928</td>\n",
|
||||
" <td>2023-03-18 22:00:40+00:00</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>@karpathy How does it compare to using chatGPT...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1637188599967027200</td>\n",
|
||||
" <td>2023-03-18 20:25:54+00:00</td>\n",
|
||||
" <td>13.0</td>\n",
|
||||
" <td>@ErikSchluntz Very likely</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>1374841081293021188</td>\n",
|
||||
" <td>1637183652458283008</td>\n",
|
||||
" <td>2023-03-18 20:06:14+00:00</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>@karpathy Do you think this will work well for...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1637154111333494784</td>\n",
|
||||
" <td>2023-03-18 18:08:51+00:00</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>@aliapanahi logprobs kwarg https://t.co/4Uuh4V...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>219</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1600031572442218497</td>\n",
|
||||
" <td>2022-12-06 07:37:08+00:00</td>\n",
|
||||
" <td>248.0</td>\n",
|
||||
" <td>😂 stop Riley probably up there as someone who ...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>8.0</td>\n",
|
||||
" <td>12.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>220</th>\n",
|
||||
" <td>16535432</td>\n",
|
||||
" <td>1600012570949058560</td>\n",
|
||||
" <td>2022-12-06 06:21:38+00:00</td>\n",
|
||||
" <td>1698.0</td>\n",
|
||||
" <td>To get a sense of how hyped LLMs are right now...</td>\n",
|
||||
" <td>18.0</td>\n",
|
||||
" <td>47.0</td>\n",
|
||||
" <td>96.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>221</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1593417987687473152</td>\n",
|
||||
" <td>2022-11-18 01:37:07+00:00</td>\n",
|
||||
" <td>206.0</td>\n",
|
||||
" <td>If previous neural nets are special-purpose co...</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>16.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>222</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1528792715810394112</td>\n",
|
||||
" <td>2022-05-23 17:39:21+00:00</td>\n",
|
||||
" <td>3044.0</td>\n",
|
||||
" <td>Something I've been doing for a few years that...</td>\n",
|
||||
" <td>42.0</td>\n",
|
||||
" <td>184.0</td>\n",
|
||||
" <td>115.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>223</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1528453604515778560</td>\n",
|
||||
" <td>2022-05-22 19:11:51+00:00</td>\n",
|
||||
" <td>914.0</td>\n",
|
||||
" <td>real-world data distribution is ~N(0,1)\\ngood ...</td>\n",
|
||||
" <td>11.0</td>\n",
|
||||
" <td>47.0</td>\n",
|
||||
" <td>65.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>224 rows × 8 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" user_id_str id_str created_at \\\n",
|
||||
"0 33836629 1637213069301649408 2023-03-18 22:03:08+00:00 \n",
|
||||
"1 1181493805356158978 1637212448674684928 2023-03-18 22:00:40+00:00 \n",
|
||||
"2 33836629 1637188599967027200 2023-03-18 20:25:54+00:00 \n",
|
||||
"3 1374841081293021188 1637183652458283008 2023-03-18 20:06:14+00:00 \n",
|
||||
"4 33836629 1637154111333494784 2023-03-18 18:08:51+00:00 \n",
|
||||
".. ... ... ... \n",
|
||||
"219 33836629 1600031572442218497 2022-12-06 07:37:08+00:00 \n",
|
||||
"220 16535432 1600012570949058560 2022-12-06 06:21:38+00:00 \n",
|
||||
"221 33836629 1593417987687473152 2022-11-18 01:37:07+00:00 \n",
|
||||
"222 33836629 1528792715810394112 2022-05-23 17:39:21+00:00 \n",
|
||||
"223 33836629 1528453604515778560 2022-05-22 19:11:51+00:00 \n",
|
||||
"\n",
|
||||
" favorite_count full_text \\\n",
|
||||
"0 69.0 @theamazingdrj Yes the integration right into ... \n",
|
||||
"1 9.0 @karpathy How does it compare to using chatGPT... \n",
|
||||
"2 13.0 @ErikSchluntz Very likely \n",
|
||||
"3 6.0 @karpathy Do you think this will work well for... \n",
|
||||
"4 5.0 @aliapanahi logprobs kwarg https://t.co/4Uuh4V... \n",
|
||||
".. ... ... \n",
|
||||
"219 248.0 😂 stop Riley probably up there as someone who ... \n",
|
||||
"220 1698.0 To get a sense of how hyped LLMs are right now... \n",
|
||||
"221 206.0 If previous neural nets are special-purpose co... \n",
|
||||
"222 3044.0 Something I've been doing for a few years that... \n",
|
||||
"223 914.0 real-world data distribution is ~N(0,1)\\ngood ... \n",
|
||||
"\n",
|
||||
" quote_count reply_count retweet_count \n",
|
||||
"0 1.0 6.0 4.0 \n",
|
||||
"1 0.0 2.0 1.0 \n",
|
||||
"2 0.0 1.0 1.0 \n",
|
||||
"3 0.0 1.0 0.0 \n",
|
||||
"4 0.0 1.0 1.0 \n",
|
||||
".. ... ... ... \n",
|
||||
"219 2.0 8.0 12.0 \n",
|
||||
"220 18.0 47.0 96.0 \n",
|
||||
"221 5.0 2.0 16.0 \n",
|
||||
"222 42.0 184.0 115.0 \n",
|
||||
"223 11.0 47.0 65.0 \n",
|
||||
"\n",
|
||||
"[224 rows x 8 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 60,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"## simple subset of relevant tweet fields \n",
|
||||
"cols = [\n",
|
||||
" 'user_id_str',\n",
|
||||
" 'id_str',\n",
|
||||
" 'created_at',\n",
|
||||
" 'favorite_count',\n",
|
||||
" 'full_text',\n",
|
||||
" 'quote_count',\n",
|
||||
" 'reply_count',\n",
|
||||
" 'retweet_count',\n",
|
||||
" # 'retweeted',\n",
|
||||
" # 'conversation_id_str',\n",
|
||||
" # 'favorited',\n",
|
||||
" # 'is_quote_status',\n",
|
||||
" # 'lang',\n",
|
||||
" # 'quoted_status_id_str',\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"df = pd.DataFrame(expr)[cols]\n",
|
||||
"\n",
|
||||
"df['created_at'] = pd.to_datetime(df['created_at'], format=\"%a %b %d %H:%M:%S %z %Y\")\n",
|
||||
"\n",
|
||||
"numeric = [\n",
|
||||
" 'favorite_count',\n",
|
||||
" 'quote_count',\n",
|
||||
" 'reply_count',\n",
|
||||
" 'retweet_count',\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"df[numeric] = df[numeric].apply(pd.to_numeric, errors='coerce')\n",
|
||||
"\n",
|
||||
"## drop duplicates, sort by date\n",
|
||||
"df = (df\n",
|
||||
" .dropna(subset='id_str')\n",
|
||||
" .drop_duplicates(subset='id_str')\n",
|
||||
" .sort_values('created_at', ascending=False)\n",
|
||||
" .reset_index(drop=True)\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"# df.to_feather(f'{time.time_ns()}.feather')\n",
|
||||
"# df.to_parquet(f'{time.time_ns()}.parquet')\n",
|
||||
"df.to_csv(f'{time.time_ns()}.csv', index=False)\n",
|
||||
"\n",
|
||||
"df"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9103413b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### search tweet text"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 56,
|
||||
"id": "401712a3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>user_id_str</th>\n",
|
||||
" <th>id_str</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>favorite_count</th>\n",
|
||||
" <th>full_text</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>9</th>\n",
|
||||
" <td>52667700</td>\n",
|
||||
" <td>1637152715716583424</td>\n",
|
||||
" <td>2023-03-18 18:03:18+00:00</td>\n",
|
||||
" <td>99.0</td>\n",
|
||||
" <td>@karpathy Sometimes I wish people could unders...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>14</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1637147822482165760</td>\n",
|
||||
" <td>2023-03-18 17:43:52+00:00</td>\n",
|
||||
" <td>325.0</td>\n",
|
||||
" <td>If not careful, fine-tuning collapses entropy ...</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>21.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>17</th>\n",
|
||||
" <td>788533935886077952</td>\n",
|
||||
" <td>1636786608916819968</td>\n",
|
||||
" <td>2023-03-17 17:48:32+00:00</td>\n",
|
||||
" <td>411.0</td>\n",
|
||||
" <td>I finally installed github copilot (better lat...</td>\n",
|
||||
" <td>5.0</td>\n",
|
||||
" <td>15.0</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>18</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1636765735627395073</td>\n",
|
||||
" <td>2023-03-17 16:25:35+00:00</td>\n",
|
||||
" <td>22.0</td>\n",
|
||||
" <td>@BlancheMinerva @JosephJacks_ I didn’t work on...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>20</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1636459245184106497</td>\n",
|
||||
" <td>2023-03-16 20:07:42+00:00</td>\n",
|
||||
" <td>1254.0</td>\n",
|
||||
" <td>Less publicized but highly awesome aspect of G...</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>38.0</td>\n",
|
||||
" <td>132.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>144</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1620875263700799488</td>\n",
|
||||
" <td>2023-02-01 20:02:31+00:00</td>\n",
|
||||
" <td>10.0</td>\n",
|
||||
" <td>@portisto @trending_repos sad. The way they co...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>145</th>\n",
|
||||
" <td>65629552</td>\n",
|
||||
" <td>1620850430254223360</td>\n",
|
||||
" <td>2023-02-01 18:23:51+00:00</td>\n",
|
||||
" <td>7.0</td>\n",
|
||||
" <td>@trending_repos @karpathy How can a main langu...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>146</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1620811724952866816</td>\n",
|
||||
" <td>2023-02-01 15:50:03+00:00</td>\n",
|
||||
" <td>245.0</td>\n",
|
||||
" <td>@trending_repos wow</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>6.0</td>\n",
|
||||
" <td>4.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>147</th>\n",
|
||||
" <td>1162359127294861314</td>\n",
|
||||
" <td>1620749130556669952</td>\n",
|
||||
" <td>2023-02-01 11:41:19+00:00</td>\n",
|
||||
" <td>2541.0</td>\n",
|
||||
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>19.0</td>\n",
|
||||
" <td>320.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>150</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1620187595979513857</td>\n",
|
||||
" <td>2023-01-30 22:29:59+00:00</td>\n",
|
||||
" <td>15.0</td>\n",
|
||||
" <td>@hi_tysam It was very nice to read through top...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>151</th>\n",
|
||||
" <td>1615441883672502291</td>\n",
|
||||
" <td>1620185408721256449</td>\n",
|
||||
" <td>2023-01-30 22:21:17+00:00</td>\n",
|
||||
" <td>15.0</td>\n",
|
||||
" <td>@karpathy I'm honored and a bit stunned. Wow, ...</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>178</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1613250487838707712</td>\n",
|
||||
" <td>2023-01-11 19:04:23+00:00</td>\n",
|
||||
" <td>2257.0</td>\n",
|
||||
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
|
||||
" <td>24.0</td>\n",
|
||||
" <td>39.0</td>\n",
|
||||
" <td>303.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>186</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1608895189078380544</td>\n",
|
||||
" <td>2022-12-30 18:37:59+00:00</td>\n",
|
||||
" <td>4356.0</td>\n",
|
||||
" <td>Nice read on reverse engineering of GitHub Cop...</td>\n",
|
||||
" <td>145.0</td>\n",
|
||||
" <td>85.0</td>\n",
|
||||
" <td>555.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>190</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1607791539258003457</td>\n",
|
||||
" <td>2022-12-27 17:32:28+00:00</td>\n",
|
||||
" <td>556.0</td>\n",
|
||||
" <td>Context I realized I have to split up minGPT b...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>23.0</td>\n",
|
||||
" <td>16.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" user_id_str id_str created_at \\\n",
|
||||
"9 52667700 1637152715716583424 2023-03-18 18:03:18+00:00 \n",
|
||||
"14 33836629 1637147822482165760 2023-03-18 17:43:52+00:00 \n",
|
||||
"17 788533935886077952 1636786608916819968 2023-03-17 17:48:32+00:00 \n",
|
||||
"18 33836629 1636765735627395073 2023-03-17 16:25:35+00:00 \n",
|
||||
"20 33836629 1636459245184106497 2023-03-16 20:07:42+00:00 \n",
|
||||
"144 33836629 1620875263700799488 2023-02-01 20:02:31+00:00 \n",
|
||||
"145 65629552 1620850430254223360 2023-02-01 18:23:51+00:00 \n",
|
||||
"146 33836629 1620811724952866816 2023-02-01 15:50:03+00:00 \n",
|
||||
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
|
||||
"150 33836629 1620187595979513857 2023-01-30 22:29:59+00:00 \n",
|
||||
"151 1615441883672502291 1620185408721256449 2023-01-30 22:21:17+00:00 \n",
|
||||
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
|
||||
"186 33836629 1608895189078380544 2022-12-30 18:37:59+00:00 \n",
|
||||
"190 33836629 1607791539258003457 2022-12-27 17:32:28+00:00 \n",
|
||||
"\n",
|
||||
" favorite_count full_text \\\n",
|
||||
"9 99.0 @karpathy Sometimes I wish people could unders... \n",
|
||||
"14 325.0 If not careful, fine-tuning collapses entropy ... \n",
|
||||
"17 411.0 I finally installed github copilot (better lat... \n",
|
||||
"18 22.0 @BlancheMinerva @JosephJacks_ I didn’t work on... \n",
|
||||
"20 1254.0 Less publicized but highly awesome aspect of G... \n",
|
||||
"144 10.0 @portisto @trending_repos sad. The way they co... \n",
|
||||
"145 7.0 @trending_repos @karpathy How can a main langu... \n",
|
||||
"146 245.0 @trending_repos wow \n",
|
||||
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
|
||||
"150 15.0 @hi_tysam It was very nice to read through top... \n",
|
||||
"151 15.0 @karpathy I'm honored and a bit stunned. Wow, ... \n",
|
||||
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
|
||||
"186 4356.0 Nice read on reverse engineering of GitHub Cop... \n",
|
||||
"190 556.0 Context I realized I have to split up minGPT b... \n",
|
||||
"\n",
|
||||
" quote_count reply_count retweet_count \n",
|
||||
"9 2.0 1.0 5.0 \n",
|
||||
"14 5.0 9.0 21.0 \n",
|
||||
"17 5.0 15.0 14.0 \n",
|
||||
"18 0.0 4.0 1.0 \n",
|
||||
"20 10.0 38.0 132.0 \n",
|
||||
"144 0.0 1.0 2.0 \n",
|
||||
"145 0.0 4.0 0.0 \n",
|
||||
"146 0.0 6.0 4.0 \n",
|
||||
"147 9.0 19.0 320.0 \n",
|
||||
"150 0.0 1.0 2.0 \n",
|
||||
"151 0.0 3.0 0.0 \n",
|
||||
"178 24.0 39.0 303.0 \n",
|
||||
"186 145.0 85.0 555.0 \n",
|
||||
"190 2.0 23.0 16.0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 56,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df[df.full_text.str.contains('repos?i?|github', regex=True, flags=re.I)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "96ebc3fd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>user_id_str</th>\n",
|
||||
" <th>id_str</th>\n",
|
||||
" <th>created_at</th>\n",
|
||||
" <th>favorite_count</th>\n",
|
||||
" <th>full_text</th>\n",
|
||||
" <th>quote_count</th>\n",
|
||||
" <th>reply_count</th>\n",
|
||||
" <th>retweet_count</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>58</th>\n",
|
||||
" <td>1615441883672502291</td>\n",
|
||||
" <td>1632577588529954819</td>\n",
|
||||
" <td>2023-03-06 03:03:23+00:00</td>\n",
|
||||
" <td>91.0</td>\n",
|
||||
" <td>Speed up your LLM research exploration with a ...</td>\n",
|
||||
" <td>2.0</td>\n",
|
||||
" <td>3.0</td>\n",
|
||||
" <td>14.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>143</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1621578354024677377</td>\n",
|
||||
" <td>2023-02-03 18:36:21+00:00</td>\n",
|
||||
" <td>5276.0</td>\n",
|
||||
" <td>The most dramatic optimization to nanoGPT so f...</td>\n",
|
||||
" <td>57.0</td>\n",
|
||||
" <td>89.0</td>\n",
|
||||
" <td>353.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>147</th>\n",
|
||||
" <td>1162359127294861314</td>\n",
|
||||
" <td>1620749130556669952</td>\n",
|
||||
" <td>2023-02-01 11:41:19+00:00</td>\n",
|
||||
" <td>2541.0</td>\n",
|
||||
" <td>Trending repository of the month 🏆\\n \\nnanoGP...</td>\n",
|
||||
" <td>9.0</td>\n",
|
||||
" <td>19.0</td>\n",
|
||||
" <td>320.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>172</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1615398117683388417</td>\n",
|
||||
" <td>2023-01-17 17:18:18+00:00</td>\n",
|
||||
" <td>21166.0</td>\n",
|
||||
" <td>🔥 New (1h56m) video lecture: \"Let's build GPT:...</td>\n",
|
||||
" <td>331.0</td>\n",
|
||||
" <td>546.0</td>\n",
|
||||
" <td>3321.0</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>178</th>\n",
|
||||
" <td>33836629</td>\n",
|
||||
" <td>1613250487838707712</td>\n",
|
||||
" <td>2023-01-11 19:04:23+00:00</td>\n",
|
||||
" <td>2257.0</td>\n",
|
||||
" <td>Didn't tweet nanoGPT yet (quietly getting it t...</td>\n",
|
||||
" <td>24.0</td>\n",
|
||||
" <td>39.0</td>\n",
|
||||
" <td>303.0</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" user_id_str id_str created_at \\\n",
|
||||
"58 1615441883672502291 1632577588529954819 2023-03-06 03:03:23+00:00 \n",
|
||||
"143 33836629 1621578354024677377 2023-02-03 18:36:21+00:00 \n",
|
||||
"147 1162359127294861314 1620749130556669952 2023-02-01 11:41:19+00:00 \n",
|
||||
"172 33836629 1615398117683388417 2023-01-17 17:18:18+00:00 \n",
|
||||
"178 33836629 1613250487838707712 2023-01-11 19:04:23+00:00 \n",
|
||||
"\n",
|
||||
" favorite_count full_text \\\n",
|
||||
"58 91.0 Speed up your LLM research exploration with a ... \n",
|
||||
"143 5276.0 The most dramatic optimization to nanoGPT so f... \n",
|
||||
"147 2541.0 Trending repository of the month 🏆\\n \\nnanoGP... \n",
|
||||
"172 21166.0 🔥 New (1h56m) video lecture: \"Let's build GPT:... \n",
|
||||
"178 2257.0 Didn't tweet nanoGPT yet (quietly getting it t... \n",
|
||||
"\n",
|
||||
" quote_count reply_count retweet_count \n",
|
||||
"58 2.0 3.0 14.0 \n",
|
||||
"143 57.0 89.0 353.0 \n",
|
||||
"147 9.0 19.0 320.0 \n",
|
||||
"172 331.0 546.0 3321.0 \n",
|
||||
"178 24.0 39.0 303.0 "
|
||||
]
|
||||
},
|
||||
"execution_count": 55,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"flags = re.I\n",
|
||||
"\n",
|
||||
"(df\n",
|
||||
" .query('full_text.str.contains(\"nanogpt\", regex=True, flags=@flags)', engine='python')\n",
|
||||
" # .query(...)\n",
|
||||
" # .query(...)\n",
|
||||
" )"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user