Adapt Source File connector to JSON files to test normalization (#872)
Fix spec.json for Source File and prepare JSON loader for nested schema testing
This commit is contained in:
@@ -1,7 +1,10 @@
|
||||
{
|
||||
"filename": "integrationTestFile",
|
||||
"format": "csv",
|
||||
"reader_options": "{\"sep\": \",\", \"nrows\": 20}",
|
||||
"storage": "HTTPS",
|
||||
"url": "https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv",
|
||||
"reader_impl": "gcsfs"
|
||||
"provider": {
|
||||
"storage": "HTTPS",
|
||||
"reader_impl": "gcsfs"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -54,7 +54,7 @@ class TestSourceFile(object):
|
||||
os.remove(tmp_file.name)
|
||||
print(f"\nLocal File {tmp_file.name} is now deleted")
|
||||
|
||||
# @pytest.fixture(scope="class")
|
||||
@pytest.fixture(scope="class")
|
||||
def create_gcs_private_data(self, download_gcs_public_data):
|
||||
storage_client = storage.Client.from_service_account_json(self.service_account_file)
|
||||
bucket_name = create_unique_gcs_bucket(storage_client, self.cloud_bucket_name)
|
||||
@@ -66,7 +66,7 @@ class TestSourceFile(object):
|
||||
bucket.delete(force=True)
|
||||
print(f"\nGCS Bucket {bucket_name} is now deleted")
|
||||
|
||||
# @pytest.fixture(scope="class")
|
||||
@pytest.fixture(scope="class")
|
||||
def create_aws_private_data(self, download_gcs_public_data):
|
||||
with open(self.aws_credentials) as json_file:
|
||||
aws_config = json.load(json_file)
|
||||
@@ -112,27 +112,27 @@ class TestSourceFile(object):
|
||||
config["reader_impl"] = reader_impl
|
||||
run_load_dataframes(config)
|
||||
|
||||
# @pytest.mark.parametrize("reader_impl", ["gcsfs", "smart_open"])
|
||||
# def test_remote_gcs_load(self, create_gcs_private_data, reader_impl):
|
||||
# config = get_config()
|
||||
# config["storage"] = "GCS"
|
||||
# config["url"] = create_gcs_private_data
|
||||
# config["reader_impl"] = reader_impl
|
||||
# with open(self.service_account_file) as json_file:
|
||||
# config["service_account_json"] = json.dumps(json.load(json_file))
|
||||
# run_load_dataframes(config)
|
||||
@pytest.mark.parametrize("reader_impl", ["gcsfs", "smart_open"])
|
||||
def test_remote_gcs_load(self, create_gcs_private_data, reader_impl):
|
||||
config = get_config()
|
||||
config["storage"] = "GCS"
|
||||
config["url"] = create_gcs_private_data
|
||||
config["reader_impl"] = reader_impl
|
||||
with open(self.service_account_file) as json_file:
|
||||
config["service_account_json"] = json.dumps(json.load(json_file))
|
||||
run_load_dataframes(config)
|
||||
|
||||
# @pytest.mark.parametrize("reader_impl", ["s3fs", "smart_open"])
|
||||
# def test_remote_aws_load(self, create_aws_private_data, reader_impl):
|
||||
# config = get_config()
|
||||
# config["storage"] = "S3"
|
||||
# config["url"] = create_aws_private_data
|
||||
# config["reader_impl"] = reader_impl
|
||||
# with open(self.aws_credentials) as json_file:
|
||||
# aws_config = json.load(json_file)
|
||||
# config["aws_access_key_id"] = aws_config["aws_access_key_id"]
|
||||
# config["aws_secret_access_key"] = aws_config["aws_secret_access_key"]
|
||||
# run_load_dataframes(config)
|
||||
@pytest.mark.parametrize("reader_impl", ["s3fs", "smart_open"])
|
||||
def test_remote_aws_load(self, create_aws_private_data, reader_impl):
|
||||
config = get_config()
|
||||
config["storage"] = "S3"
|
||||
config["url"] = create_aws_private_data
|
||||
config["reader_impl"] = reader_impl
|
||||
with open(self.aws_credentials) as json_file:
|
||||
aws_config = json.load(json_file)
|
||||
config["aws_access_key_id"] = aws_config["aws_access_key_id"]
|
||||
config["aws_secret_access_key"] = aws_config["aws_secret_access_key"]
|
||||
run_load_dataframes(config)
|
||||
|
||||
|
||||
def run_load_dataframes(config):
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"filename": "integrationTestFile",
|
||||
"format": "json",
|
||||
"provider": { "storage": "HTTPS" },
|
||||
"url": "https://think.cs.vt.edu/corgis/datasets/json/airlines/airlines.json"
|
||||
}
|
||||
Reference in New Issue
Block a user