mirror of
https://github.com/Azure/MachineLearningNotebooks.git
synced 2025-12-19 17:17:04 -05:00
31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
# import packages
|
|
import os
|
|
import pandas as pd
|
|
from calendar import monthrange
|
|
from datetime import datetime, timedelta
|
|
from azureml.core import Dataset, Datastore, Workspace
|
|
from azureml.opendatasets import NoaaIsdWeather
|
|
|
|
# get workspace and datastore
|
|
ws = Workspace.from_config()
|
|
dstore = ws.get_default_datastore()
|
|
|
|
# adjust parameters as needed
|
|
target_years = list(range(2010, 2020))
|
|
start_month = 1
|
|
|
|
# get data
|
|
for year in target_years:
|
|
for month in range(start_month, 12 + 1):
|
|
path = 'weather-data/{}/{:02d}/'.format(year, month)
|
|
try:
|
|
start = datetime(year, month, 1)
|
|
end = datetime(year, month, monthrange(year, month)[1]) + timedelta(days=1)
|
|
isd = NoaaIsdWeather(start, end).to_pandas_dataframe()
|
|
isd = isd[isd['stationName'].str.contains('FLORIDA', regex=True, na=False)]
|
|
os.makedirs(path, exist_ok=True)
|
|
isd.to_parquet(path + 'data.parquet')
|
|
except Exception as e:
|
|
print('Month {} in year {} likely has no data.\n'.format(month, year))
|
|
print('Exception: {}'.format(e))
|