mirror of
https://github.com/trevorhobenshield/twitter-api-client.git
synced 2025-12-25 02:03:15 -05:00
update examples
This commit is contained in:
@@ -1,5 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "802043b7",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### polars/pandas examples"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
@@ -24,28 +32,157 @@
|
||||
"import orjson\n",
|
||||
"from pathlib import Path\n",
|
||||
"from twitter.utils import find_key\n",
|
||||
"import pandas as pd"
|
||||
"import pandas as pd\n",
|
||||
"import polars as pl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "4815e47f",
|
||||
"id": "4703bee3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def to_int(tdf: pl.LazyFrame, *args) -> pl.LazyFrame:\n",
|
||||
" return tdf.with_columns(pl.col(col).cast(pl.Int64, strict=False).alias(col) for col in args)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def to_dt(tdf: pl.LazyFrame, fmt: str, *args) -> pl.LazyFrame:\n",
|
||||
" return tdf.with_columns(pl.col(col).str.strptime(pl.Datetime, fmt).alias(col) for col in args)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def get_data(path: Path, expr: str = '', **kwargs) -> dict:\n",
|
||||
" D = {}\n",
|
||||
" for p in path.rglob('*'):\n",
|
||||
" if re.search(expr, p.name, **kwargs):\n",
|
||||
" D.setdefault(p.stem.split('_')[-1], []).append(orjson.loads(p.read_bytes()))\n",
|
||||
" return D\n",
|
||||
" return D"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b0addc33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PATH = Path('data/raw')\n",
|
||||
"\n",
|
||||
"# filter for users who favorited or retweeted a tweet\n",
|
||||
"data = get_data(PATH, expr='Favoriters|Retweeters')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "09efb374",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### polars"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "e3a70d0e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_user_details(data: dict, cols: list = None, sort: str = 'created_at') -> pl.LazyFrame:\n",
|
||||
" numeric = [\n",
|
||||
" 'fast_followers_count',\n",
|
||||
" 'favourites_count',\n",
|
||||
" 'followers_count',\n",
|
||||
" 'friends_count',\n",
|
||||
" 'listed_count',\n",
|
||||
" 'media_count',\n",
|
||||
" 'normal_followers_count',\n",
|
||||
" 'statuses_count',\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def get_user_details(data: dict, cols: list = None, sort: str = 'created_at') -> pd.DataFrame:\n",
|
||||
" \"\"\"\n",
|
||||
" add \"+\" to sort ascending\n",
|
||||
" \"\"\"\n",
|
||||
" D = []\n",
|
||||
" for u in find_key(data, 'user_results'):\n",
|
||||
" x = u.get('result', {})\n",
|
||||
" y = x.get('rest_id')\n",
|
||||
" if z := x.get('legacy', {}):\n",
|
||||
" D.append({'rest_id': y} | z)\n",
|
||||
"\n",
|
||||
" return (\n",
|
||||
" pl.LazyFrame(D)\n",
|
||||
" .unique(subset='rest_id')\n",
|
||||
" .pipe(to_dt, '%a %b %d %H:%M:%S %z %Y', 'created_at')\n",
|
||||
" .pipe(to_int, *numeric)\n",
|
||||
" .sort(sort.strip(\"-\"), descending=\"-\" not in sort)\n",
|
||||
" .select(cols)\n",
|
||||
" )"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "91495fc2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><style>\n",
|
||||
".dataframe > thead > tr > th,\n",
|
||||
".dataframe > tbody > tr > td {\n",
|
||||
" text-align: right;\n",
|
||||
"}\n",
|
||||
"</style>\n",
|
||||
"<small>shape: (1855, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>created_at</th><th>screen_name</th><th>followers_count</th></tr><tr><td>datetime[μs, +00:00]</td><td>str</td><td>i64</td></tr></thead><tbody><tr><td>2007-03-31 01:16:45 +00:00</td><td>"TheLos"</td><td>1601</td></tr><tr><td>2008-03-18 19:04:59 +00:00</td><td>"wickedjava"</td><td>2986</td></tr><tr><td>2008-04-17 17:30:21 +00:00</td><td>"needless_input...</td><td>218</td></tr><tr><td>2008-06-27 08:58:13 +00:00</td><td>"DebrisStorm"</td><td>178</td></tr><tr><td>2008-07-26 21:58:07 +00:00</td><td>"daka17"</td><td>66</td></tr><tr><td>2008-09-03 23:27:25 +00:00</td><td>"heyitsaaron"</td><td>1230</td></tr><tr><td>2008-09-11 23:37:14 +00:00</td><td>"marinamiss"</td><td>771</td></tr><tr><td>2008-09-18 13:59:25 +00:00</td><td>"shangrila79"</td><td>229</td></tr><tr><td>2008-10-11 07:18:09 +00:00</td><td>"fridayschild71...</td><td>183</td></tr><tr><td>2008-10-27 19:40:43 +00:00</td><td>"Jacelendrahz"</td><td>188</td></tr><tr><td>2008-11-06 21:50:56 +00:00</td><td>"yolo_pinyato"</td><td>2944</td></tr><tr><td>2008-12-05 07:33:23 +00:00</td><td>"El_Dandy40"</td><td>205</td></tr><tr><td>…</td><td>…</td><td>…</td></tr><tr><td>2023-02-06 15:48:26 +00:00</td><td>"CosmicGhidorah...</td><td>11</td></tr><tr><td>2023-02-08 21:09:17 +00:00</td><td>"backupfHell"</td><td>14</td></tr><tr><td>2023-02-09 19:24:12 +00:00</td><td>"KayFabulous80"</td><td>144</td></tr><tr><td>2023-02-14 04:06:11 +00:00</td><td>"HDBNGRClub"</td><td>3</td></tr><tr><td>2023-02-16 18:38:48 +00:00</td><td>"SladjaMilov14"</td><td>1</td></tr><tr><td>2023-02-17 22:38:58 +00:00</td><td>"c0pas27"</td><td>53</td></tr><tr><td>2023-02-19 06:35:24 +00:00</td><td>"B4NKSCLUB"</td><td>13</td></tr><tr><td>2023-02-19 07:06:15 +00:00</td><td>"Later_Hayter"</td><td>54</td></tr><tr><td>2023-02-21 06:47:49 +00:00</td><td>"hart_kanya"</td><td>2</td></tr><tr><td>2023-02-26 09:43:04 +00:00</td><td>"_Val_Nichole"</td><td>62</td></tr><tr><td>2023-03-04 23:50:32 +00:00</td><td>"Chublosophy"</td><td>346</td></tr><tr><td>2023-03-05 20:56:30 +00:00</td><td>"Erron_20"</td><td>8</td></tr></tbody></table></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (1855, 3)\n",
|
||||
"┌────────────────────────────┬────────────────┬─────────────────┐\n",
|
||||
"│ created_at ┆ screen_name ┆ followers_count │\n",
|
||||
"│ --- ┆ --- ┆ --- │\n",
|
||||
"│ datetime[μs, +00:00] ┆ str ┆ i64 │\n",
|
||||
"╞════════════════════════════╪════════════════╪═════════════════╡\n",
|
||||
"│ 2007-03-31 01:16:45 +00:00 ┆ TheLos ┆ 1601 │\n",
|
||||
"│ 2008-03-18 19:04:59 +00:00 ┆ wickedjava ┆ 2986 │\n",
|
||||
"│ 2008-04-17 17:30:21 +00:00 ┆ needless_input ┆ 218 │\n",
|
||||
"│ 2008-06-27 08:58:13 +00:00 ┆ DebrisStorm ┆ 178 │\n",
|
||||
"│ … ┆ … ┆ … │\n",
|
||||
"│ 2023-02-21 06:47:49 +00:00 ┆ hart_kanya ┆ 2 │\n",
|
||||
"│ 2023-02-26 09:43:04 +00:00 ┆ _Val_Nichole ┆ 62 │\n",
|
||||
"│ 2023-03-04 23:50:32 +00:00 ┆ Chublosophy ┆ 346 │\n",
|
||||
"│ 2023-03-05 20:56:30 +00:00 ┆ Erron_20 ┆ 8 │\n",
|
||||
"└────────────────────────────┴────────────────┴─────────────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"lf = get_user_details(\n",
|
||||
" data,\n",
|
||||
" cols=['created_at', 'screen_name', 'followers_count'],\n",
|
||||
" sort='-created_at',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"lf.collect()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "03aa8cc0",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### pandas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "4815e47f",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_user_details2(data: dict, cols: list = None, sort: str = 'created_at') -> pd.DataFrame:\n",
|
||||
" D = []\n",
|
||||
" for u in find_key(data, 'user_results'):\n",
|
||||
" x = u.get('result', {})\n",
|
||||
@@ -56,7 +193,7 @@
|
||||
" pd.DataFrame(D)\n",
|
||||
" .drop_duplicates('rest_id')\n",
|
||||
" .assign(created_at=lambda x: pd.to_datetime(x['created_at']))\n",
|
||||
" .sort_values(sort.strip('+'), ascending='+' in sort)\n",
|
||||
" .sort_values(sort.strip('-'), ascending='-' in sort)\n",
|
||||
" .reset_index(drop=True)\n",
|
||||
" )\n",
|
||||
" n = [x for x in df.columns if 'count' in x]\n",
|
||||
@@ -66,21 +203,9 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 8,
|
||||
"id": "feb0251b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"PATH = Path('data/raw')\n",
|
||||
"\n",
|
||||
"data = get_data(PATH, expr='Favoriters|Retweeters') # filter for users who favorited or retweeted a tweet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "c711659e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
@@ -111,33 +236,33 @@
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>2014-07-14 20:24:30+00:00</td>\n",
|
||||
" <td>larry_deramus</td>\n",
|
||||
" <td>25513</td>\n",
|
||||
" <td>2007-03-31 01:16:45+00:00</td>\n",
|
||||
" <td>TheLos</td>\n",
|
||||
" <td>1601</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>2012-12-06 17:07:13+00:00</td>\n",
|
||||
" <td>OneAhmedSagheer</td>\n",
|
||||
" <td>22092</td>\n",
|
||||
" <td>2008-03-18 19:04:59+00:00</td>\n",
|
||||
" <td>wickedjava</td>\n",
|
||||
" <td>2986</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>2013-08-12 21:56:09+00:00</td>\n",
|
||||
" <td>eriikaswrld</td>\n",
|
||||
" <td>16872</td>\n",
|
||||
" <td>2008-04-17 17:30:21+00:00</td>\n",
|
||||
" <td>needless_input</td>\n",
|
||||
" <td>218</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>2009-04-17 06:02:06+00:00</td>\n",
|
||||
" <td>BrandonEsWolf</td>\n",
|
||||
" <td>15561</td>\n",
|
||||
" <td>2008-06-27 08:58:13+00:00</td>\n",
|
||||
" <td>DebrisStorm</td>\n",
|
||||
" <td>178</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>2009-06-18 23:30:10+00:00</td>\n",
|
||||
" <td>badgalvitoria</td>\n",
|
||||
" <td>14621</td>\n",
|
||||
" <td>2008-07-26 21:58:07+00:00</td>\n",
|
||||
" <td>daka17</td>\n",
|
||||
" <td>66</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
@@ -147,33 +272,33 @@
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1850</th>\n",
|
||||
" <td>2023-01-06 17:18:17+00:00</td>\n",
|
||||
" <td>Jeff62245805</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2023-02-19 07:06:15+00:00</td>\n",
|
||||
" <td>Later_Hayter</td>\n",
|
||||
" <td>54</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1851</th>\n",
|
||||
" <td>2023-02-16 18:38:48+00:00</td>\n",
|
||||
" <td>SladjaMilov14</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>2023-02-21 06:47:49+00:00</td>\n",
|
||||
" <td>hart_kanya</td>\n",
|
||||
" <td>2</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1852</th>\n",
|
||||
" <td>2022-10-20 23:47:34+00:00</td>\n",
|
||||
" <td>DylonsaurusRex</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-02-26 09:43:04+00:00</td>\n",
|
||||
" <td>_Val_Nichole</td>\n",
|
||||
" <td>62</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1853</th>\n",
|
||||
" <td>2022-10-14 22:10:56+00:00</td>\n",
|
||||
" <td>SanduskyAddison</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-03-04 23:50:32+00:00</td>\n",
|
||||
" <td>Chublosophy</td>\n",
|
||||
" <td>346</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1854</th>\n",
|
||||
" <td>2020-10-11 18:43:54+00:00</td>\n",
|
||||
" <td>Jeff59977360</td>\n",
|
||||
" <td>0</td>\n",
|
||||
" <td>2023-03-05 20:56:30+00:00</td>\n",
|
||||
" <td>Erron_20</td>\n",
|
||||
" <td>8</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
@@ -181,32 +306,36 @@
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" created_at screen_name followers_count\n",
|
||||
"0 2014-07-14 20:24:30+00:00 larry_deramus 25513\n",
|
||||
"1 2012-12-06 17:07:13+00:00 OneAhmedSagheer 22092\n",
|
||||
"2 2013-08-12 21:56:09+00:00 eriikaswrld 16872\n",
|
||||
"3 2009-04-17 06:02:06+00:00 BrandonEsWolf 15561\n",
|
||||
"4 2009-06-18 23:30:10+00:00 badgalvitoria 14621\n",
|
||||
"... ... ... ...\n",
|
||||
"1850 2023-01-06 17:18:17+00:00 Jeff62245805 1\n",
|
||||
"1851 2023-02-16 18:38:48+00:00 SladjaMilov14 1\n",
|
||||
"1852 2022-10-20 23:47:34+00:00 DylonsaurusRex 0\n",
|
||||
"1853 2022-10-14 22:10:56+00:00 SanduskyAddison 0\n",
|
||||
"1854 2020-10-11 18:43:54+00:00 Jeff59977360 0\n",
|
||||
" created_at screen_name followers_count\n",
|
||||
"0 2007-03-31 01:16:45+00:00 TheLos 1601\n",
|
||||
"1 2008-03-18 19:04:59+00:00 wickedjava 2986\n",
|
||||
"2 2008-04-17 17:30:21+00:00 needless_input 218\n",
|
||||
"3 2008-06-27 08:58:13+00:00 DebrisStorm 178\n",
|
||||
"4 2008-07-26 21:58:07+00:00 daka17 66\n",
|
||||
"... ... ... ...\n",
|
||||
"1850 2023-02-19 07:06:15+00:00 Later_Hayter 54\n",
|
||||
"1851 2023-02-21 06:47:49+00:00 hart_kanya 2\n",
|
||||
"1852 2023-02-26 09:43:04+00:00 _Val_Nichole 62\n",
|
||||
"1853 2023-03-04 23:50:32+00:00 Chublosophy 346\n",
|
||||
"1854 2023-03-05 20:56:30+00:00 Erron_20 8\n",
|
||||
"\n",
|
||||
"[1855 rows x 3 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df = get_user_details(\n",
|
||||
"PATH = Path('/home/x/PycharmProjects/twitter-api-client/_test/data')\n",
|
||||
"\n",
|
||||
"data = get_data(PATH, expr='Favoriters|Retweeters') # filter for users who favorited or retweeted a tweet\n",
|
||||
"\n",
|
||||
"df = get_user_details2(\n",
|
||||
" data,\n",
|
||||
" cols = ['created_at','screen_name','followers_count'],\n",
|
||||
" sort = 'followers_count',\n",
|
||||
" sort = '-created_at',\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"df"
|
||||
|
||||
Reference in New Issue
Block a user