MachineLearningNotebooks/how-to-use-azureml/work-with-data/datadrift-tutorial/get_data.py

# import packages
import os
import pandas as pd
from calendar import monthrange
from datetime import datetime, timedelta
from azureml.core import Dataset, Datastore, Workspace
from azureml.opendatasets import NoaaIsdWeather

# get workspace and datastore
ws = Workspace.from_config()
dstore = ws.get_default_datastore()

# adjust parameters as needed
target_years = list(range(2010, 2020))
start_month = 1

# get data
for year in target_years:
    for month in range(start_month, 12 + 1):
        path = 'weather-data/{}/{:02d}/'.format(year, month)
        try:
            start = datetime(year, month, 1)
            end = datetime(year, month, monthrange(year, month)[1]) + timedelta(days=1)
            isd = NoaaIsdWeather(start, end).to_pandas_dataframe()
            isd = isd[isd['stationName'].str.contains('FLORIDA', regex=True, na=False)]
            os.makedirs(path, exist_ok=True)
            isd.to_parquet(path + 'data.parquet')
        except Exception as e:
            print('Month {} in year {} likely has no data.\n'.format(month, year))
            print('Exception: {}'.format(e))