1
0
mirror of synced 2025-12-25 02:09:19 -05:00

Adapt Source File connector to JSON files to test normalization (#872)

Fix spec.json for Source File and prepare JSON loader for nested schema testing
This commit is contained in:
Christophe Duong
2020-11-11 00:33:39 +01:00
committed by GitHub
parent 199a3a16e8
commit 70f0446677
9 changed files with 247 additions and 161 deletions

View File

@@ -1,7 +1,10 @@
{
"filename": "integrationTestFile",
"format": "csv",
"reader_options": "{\"sep\": \",\", \"nrows\": 20}",
"storage": "HTTPS",
"url": "https://storage.googleapis.com/covid19-open-data/v2/latest/epidemiology.csv",
"reader_impl": "gcsfs"
"provider": {
"storage": "HTTPS",
"reader_impl": "gcsfs"
}
}

View File

@@ -54,7 +54,7 @@ class TestSourceFile(object):
os.remove(tmp_file.name)
print(f"\nLocal File {tmp_file.name} is now deleted")
# @pytest.fixture(scope="class")
@pytest.fixture(scope="class")
def create_gcs_private_data(self, download_gcs_public_data):
storage_client = storage.Client.from_service_account_json(self.service_account_file)
bucket_name = create_unique_gcs_bucket(storage_client, self.cloud_bucket_name)
@@ -66,7 +66,7 @@ class TestSourceFile(object):
bucket.delete(force=True)
print(f"\nGCS Bucket {bucket_name} is now deleted")
# @pytest.fixture(scope="class")
@pytest.fixture(scope="class")
def create_aws_private_data(self, download_gcs_public_data):
with open(self.aws_credentials) as json_file:
aws_config = json.load(json_file)
@@ -112,27 +112,27 @@ class TestSourceFile(object):
config["reader_impl"] = reader_impl
run_load_dataframes(config)
# @pytest.mark.parametrize("reader_impl", ["gcsfs", "smart_open"])
# def test_remote_gcs_load(self, create_gcs_private_data, reader_impl):
# config = get_config()
# config["storage"] = "GCS"
# config["url"] = create_gcs_private_data
# config["reader_impl"] = reader_impl
# with open(self.service_account_file) as json_file:
# config["service_account_json"] = json.dumps(json.load(json_file))
# run_load_dataframes(config)
@pytest.mark.parametrize("reader_impl", ["gcsfs", "smart_open"])
def test_remote_gcs_load(self, create_gcs_private_data, reader_impl):
config = get_config()
config["storage"] = "GCS"
config["url"] = create_gcs_private_data
config["reader_impl"] = reader_impl
with open(self.service_account_file) as json_file:
config["service_account_json"] = json.dumps(json.load(json_file))
run_load_dataframes(config)
# @pytest.mark.parametrize("reader_impl", ["s3fs", "smart_open"])
# def test_remote_aws_load(self, create_aws_private_data, reader_impl):
# config = get_config()
# config["storage"] = "S3"
# config["url"] = create_aws_private_data
# config["reader_impl"] = reader_impl
# with open(self.aws_credentials) as json_file:
# aws_config = json.load(json_file)
# config["aws_access_key_id"] = aws_config["aws_access_key_id"]
# config["aws_secret_access_key"] = aws_config["aws_secret_access_key"]
# run_load_dataframes(config)
@pytest.mark.parametrize("reader_impl", ["s3fs", "smart_open"])
def test_remote_aws_load(self, create_aws_private_data, reader_impl):
config = get_config()
config["storage"] = "S3"
config["url"] = create_aws_private_data
config["reader_impl"] = reader_impl
with open(self.aws_credentials) as json_file:
aws_config = json.load(json_file)
config["aws_access_key_id"] = aws_config["aws_access_key_id"]
config["aws_secret_access_key"] = aws_config["aws_secret_access_key"]
run_load_dataframes(config)
def run_load_dataframes(config):

View File

@@ -0,0 +1,6 @@
{
"filename": "integrationTestFile",
"format": "json",
"provider": { "storage": "HTTPS" },
"url": "https://think.cs.vt.edu/corgis/datasets/json/airlines/airlines.json"
}