File cdk parser and cursor updates (#28900)
* File-based CDK: update parquet parser to handle partitions * File-based CDK: make the record output & cursor date time format consistent
This commit is contained in:
@@ -823,7 +823,7 @@ class DatetimeBasedCursor(BaseModel):
|
||||
cursor_datetime_formats: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="The possible formats for the cursor field",
|
||||
title="Cursor Datetime Format",
|
||||
title="Cursor Datetime Formats",
|
||||
)
|
||||
cursor_granularity: Optional[str] = Field(
|
||||
None,
|
||||
|
||||
@@ -4,7 +4,9 @@
|
||||
|
||||
import json
|
||||
import logging
|
||||
from typing import Any, Dict, Iterable, Mapping
|
||||
import os
|
||||
from typing import Any, Dict, Iterable, List, Mapping
|
||||
from urllib.parse import unquote
|
||||
|
||||
import pyarrow as pa
|
||||
import pyarrow.parquet as pq
|
||||
@@ -27,11 +29,16 @@ class ParquetParser(FileTypeParser):
|
||||
if not isinstance(parquet_format, ParquetFormat):
|
||||
raise ValueError(f"Expected ParquetFormat, got {parquet_format}")
|
||||
|
||||
# Pyarrow can detect the schema of a parquet file by reading only its metadata.
|
||||
# https://github.com/apache/arrow/blob/main/python/pyarrow/_parquet.pyx#L1168-L1243
|
||||
parquet_file = pq.ParquetFile(stream_reader.open_file(file, self.file_read_mode, logger))
|
||||
parquet_schema = parquet_file.schema_arrow
|
||||
with stream_reader.open_file(file, self.file_read_mode, logger) as fp:
|
||||
parquet_file = pq.ParquetFile(fp)
|
||||
parquet_schema = parquet_file.schema_arrow
|
||||
|
||||
# Inferred non-partition schema
|
||||
schema = {field.name: ParquetParser.parquet_type_to_schema_type(field.type, parquet_format) for field in parquet_schema}
|
||||
# Inferred partition schema
|
||||
partition_columns = {partition.split("=")[0]: {"type": "string"} for partition in self._extract_partitions(file.uri)}
|
||||
|
||||
schema.update(partition_columns)
|
||||
return schema
|
||||
|
||||
def parse_records(
|
||||
@@ -45,13 +52,18 @@ class ParquetParser(FileTypeParser):
|
||||
if not isinstance(parquet_format, ParquetFormat):
|
||||
raise ValueError(f"Expected ParquetFormat, got {parquet_format}") # FIXME test this branch!
|
||||
with stream_reader.open_file(file, self.file_read_mode, logger) as fp:
|
||||
table = pq.read_table(fp)
|
||||
for batch in table.to_batches():
|
||||
for i in range(batch.num_rows):
|
||||
row_dict = {
|
||||
column: ParquetParser._to_output_value(batch.column(column)[i], parquet_format) for column in table.column_names
|
||||
}
|
||||
yield row_dict
|
||||
reader = pq.ParquetFile(fp)
|
||||
partition_columns = {x.split("=")[0]: x.split("=")[1] for x in self._extract_partitions(file.uri)}
|
||||
for row_group in range(reader.num_row_groups):
|
||||
batch_dict = reader.read_row_group(row_group).to_pydict()
|
||||
for record_values in zip(*batch_dict.values()):
|
||||
record = dict(zip(batch_dict.keys(), record_values))
|
||||
record.update(partition_columns)
|
||||
yield record
|
||||
|
||||
@staticmethod
|
||||
def _extract_partitions(filepath: str) -> List[str]:
|
||||
return [unquote(partition) for partition in filepath.split(os.sep) if "=" in partition]
|
||||
|
||||
@property
|
||||
def file_read_mode(self) -> FileReadMode:
|
||||
|
||||
@@ -48,11 +48,21 @@ class DefaultFileBasedCursor(FileBasedCursor):
|
||||
)
|
||||
|
||||
def get_state(self) -> StreamState:
|
||||
state = {
|
||||
"history": self._file_to_datetime_history,
|
||||
}
|
||||
state = {"history": self._file_to_datetime_history, "_ab_source_file_last_modified": self._get_cursor()}
|
||||
return state
|
||||
|
||||
def _get_cursor(self) -> Optional[str]:
|
||||
"""
|
||||
Returns the cursor value.
|
||||
|
||||
Files are synced in order of last-modified with secondary sort on filename, so the cursor value is
|
||||
a string joining the last-modified timestamp of the last synced file and the name of the file.
|
||||
"""
|
||||
if self._file_to_datetime_history.items():
|
||||
filename, timestamp = max(self._file_to_datetime_history.items(), key=lambda x: (x[1], x[0]))
|
||||
return f"{timestamp}_{filename}"
|
||||
return None
|
||||
|
||||
def _is_history_full(self) -> bool:
|
||||
"""
|
||||
Returns true if the state's history is full, meaning new entries will start to replace old entries.
|
||||
|
||||
@@ -34,6 +34,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
||||
The default file-based stream.
|
||||
"""
|
||||
|
||||
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
||||
ab_last_mod_col = "_ab_source_file_last_modified"
|
||||
ab_file_name_col = "_ab_source_file_url"
|
||||
airbyte_columns = [ab_last_mod_col, ab_file_name_col]
|
||||
@@ -78,7 +79,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
||||
parser = self.get_parser(self.config.file_type)
|
||||
for file in stream_slice["files"]:
|
||||
# only serialize the datetime once
|
||||
file_datetime_string = file.last_modified.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
||||
n_skipped = line_no = 0
|
||||
|
||||
try:
|
||||
|
||||
@@ -219,7 +219,7 @@ single_avro_scenario = (
|
||||
"data": {
|
||||
"col1": "val11",
|
||||
"col2": 12,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -228,7 +228,7 @@ single_avro_scenario = (
|
||||
"data": {
|
||||
"col1": "val21",
|
||||
"col2": 22,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -282,7 +282,7 @@ multiple_avro_combine_schema_scenario = (
|
||||
"col_double": "20.02",
|
||||
"col_string": "Robbers",
|
||||
"col_album": {"album": "The 1975"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -292,7 +292,7 @@ multiple_avro_combine_schema_scenario = (
|
||||
"col_double": "20.23",
|
||||
"col_string": "Somebody Else",
|
||||
"col_album": {"album": "I Like It When You Sleep, for You Are So Beautiful yet So Unaware of It"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -302,7 +302,7 @@ multiple_avro_combine_schema_scenario = (
|
||||
"col_double": "1975.1975",
|
||||
"col_string": "It's Not Living (If It's Not with You)",
|
||||
"col_song": {"title": "Love It If We Made It"},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -312,7 +312,7 @@ multiple_avro_combine_schema_scenario = (
|
||||
"col_double": "5791.5791",
|
||||
"col_string": "The 1975",
|
||||
"col_song": {"title": "About You"},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -407,7 +407,7 @@ avro_all_types_scenario = (
|
||||
"col_timestamp_micros": "2022-05-30T00:00:00.456789+00:00",
|
||||
"col_local_timestamp_millis": "2022-05-29T00:00:00.456000",
|
||||
"col_local_timestamp_micros": "2022-05-30T00:00:00.456789",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -488,7 +488,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_album": "A_MOMENT_APART",
|
||||
"col_year": 2017,
|
||||
"col_vocals": False,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "odesza_songs.avro",
|
||||
},
|
||||
"stream": "songs_stream",
|
||||
@@ -499,7 +499,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_album": "IN_RETURN",
|
||||
"col_year": 2014,
|
||||
"col_vocals": True,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "odesza_songs.avro",
|
||||
},
|
||||
"stream": "songs_stream",
|
||||
@@ -510,7 +510,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_album": "THE_LAST_GOODBYE",
|
||||
"col_year": 2022,
|
||||
"col_vocals": True,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "odesza_songs.avro",
|
||||
},
|
||||
"stream": "songs_stream",
|
||||
@@ -521,7 +521,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_album": "SUMMERS_GONE",
|
||||
"col_year": 2012,
|
||||
"col_vocals": True,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "odesza_songs.avro",
|
||||
},
|
||||
"stream": "songs_stream",
|
||||
@@ -532,7 +532,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_album": "IN_RETURN",
|
||||
"col_year": 2014,
|
||||
"col_vocals": True,
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "odesza_songs.avro",
|
||||
},
|
||||
"stream": "songs_stream",
|
||||
@@ -542,7 +542,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_name": "Coachella",
|
||||
"col_location": {"country": "USA", "state": "California", "city": "Indio"},
|
||||
"col_attendance": 250000,
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "california_festivals.avro",
|
||||
},
|
||||
"stream": "festivals_stream",
|
||||
@@ -552,7 +552,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_name": "CRSSD",
|
||||
"col_location": {"country": "USA", "state": "California", "city": "San Diego"},
|
||||
"col_attendance": 30000,
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "california_festivals.avro",
|
||||
},
|
||||
"stream": "festivals_stream",
|
||||
@@ -562,7 +562,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_name": "Lightning in a Bottle",
|
||||
"col_location": {"country": "USA", "state": "California", "city": "Buena Vista Lake"},
|
||||
"col_attendance": 18000,
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "california_festivals.avro",
|
||||
},
|
||||
"stream": "festivals_stream",
|
||||
@@ -572,7 +572,7 @@ multiple_streams_avro_scenario = (
|
||||
"col_name": "Outside Lands",
|
||||
"col_location": {"country": "USA", "state": "California", "city": "San Francisco"},
|
||||
"col_attendance": 220000,
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "california_festivals.avro",
|
||||
},
|
||||
"stream": "festivals_stream",
|
||||
@@ -653,7 +653,7 @@ avro_file_with_decimal_as_float_scenario = (
|
||||
"col_double": 20.02,
|
||||
"col_string": "Robbers",
|
||||
"col_album": {"album": "The 1975"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -663,7 +663,7 @@ avro_file_with_decimal_as_float_scenario = (
|
||||
"col_double": 20.23,
|
||||
"col_string": "Somebody Else",
|
||||
"col_album": {"album": "I Like It When You Sleep, for You Are So Beautiful yet So Unaware of It"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -673,7 +673,7 @@ avro_file_with_decimal_as_float_scenario = (
|
||||
"col_double": 1975.1975,
|
||||
"col_string": "It's Not Living (If It's Not with You)",
|
||||
"col_song": {"title": "Love It If We Made It"},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -683,7 +683,7 @@ avro_file_with_decimal_as_float_scenario = (
|
||||
"col_double": 5791.5791,
|
||||
"col_string": "The 1975",
|
||||
"col_song": {"title": "About You"},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.avro",
|
||||
},
|
||||
"stream": "stream1",
|
||||
|
||||
@@ -255,7 +255,7 @@ single_csv_scenario = (
|
||||
"data": {
|
||||
"col1": "val11",
|
||||
"col2": "val12",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -264,7 +264,7 @@ single_csv_scenario = (
|
||||
"data": {
|
||||
"col1": "val21",
|
||||
"col2": "val22",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -337,7 +337,7 @@ multi_csv_scenario = (
|
||||
"data": {
|
||||
"col1": "val11a",
|
||||
"col2": "val12a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -346,7 +346,7 @@ multi_csv_scenario = (
|
||||
"data": {
|
||||
"col1": "val21a",
|
||||
"col2": "val22a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -356,7 +356,7 @@ multi_csv_scenario = (
|
||||
"col1": "val11b",
|
||||
"col2": "val12b",
|
||||
"col3": "val13b",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -366,7 +366,7 @@ multi_csv_scenario = (
|
||||
"col1": "val21b",
|
||||
"col2": "val22b",
|
||||
"col3": "val23b",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -438,7 +438,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
||||
"data": {
|
||||
"col1": "val11a",
|
||||
"col2": "val12a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -447,7 +447,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
||||
"data": {
|
||||
"col1": "val21a",
|
||||
"col2": "val22a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -457,7 +457,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
||||
"col1": "val11b",
|
||||
"col2": "val12b",
|
||||
"col3": "val13b",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -467,7 +467,7 @@ multi_csv_stream_n_file_exceeds_limit_for_inference = (
|
||||
"col1": "val21b",
|
||||
"col2": "val22b",
|
||||
"col3": "val23b",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -603,7 +603,7 @@ csv_single_stream_scenario = (
|
||||
"data": {
|
||||
"col1": "val11a",
|
||||
"col2": "val12a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -612,7 +612,7 @@ csv_single_stream_scenario = (
|
||||
"data": {
|
||||
"col1": "val21a",
|
||||
"col2": "val22a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -705,7 +705,7 @@ csv_multi_stream_scenario = (
|
||||
"data": {
|
||||
"col1": "val11a",
|
||||
"col2": "val12a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -714,25 +714,25 @@ csv_multi_stream_scenario = (
|
||||
"data": {
|
||||
"col1": "val21a",
|
||||
"col2": "val22a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
]
|
||||
@@ -814,7 +814,7 @@ csv_custom_format_scenario = (
|
||||
"col1": "val11",
|
||||
"col2": "val12",
|
||||
"col3": "val |13|",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -824,7 +824,7 @@ csv_custom_format_scenario = (
|
||||
"col1": "val21",
|
||||
"col2": "val22",
|
||||
"col3": "val23",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -834,7 +834,7 @@ csv_custom_format_scenario = (
|
||||
"col1": "val,31",
|
||||
"col2": "val |,32|",
|
||||
"col3": "val, !! 33",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -922,7 +922,7 @@ csv_legacy_format_scenario = (
|
||||
"col1": "val11",
|
||||
"col2": "val12",
|
||||
"col3": "val |13|",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -932,7 +932,7 @@ csv_legacy_format_scenario = (
|
||||
"col1": "val21",
|
||||
"col2": "val22",
|
||||
"col3": "val23",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -942,7 +942,7 @@ csv_legacy_format_scenario = (
|
||||
"col1": "val,31",
|
||||
"col2": "val |,32|",
|
||||
"col3": "val, !! 33",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1063,7 +1063,7 @@ multi_stream_custom_format = (
|
||||
"data": {
|
||||
"col1": "val11a",
|
||||
"col2": "val ! 12a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1072,25 +1072,25 @@ multi_stream_custom_format = (
|
||||
"data": {
|
||||
"col1": "val ! 21a",
|
||||
"col2": "val22a",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val @@@@ 13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val @@@@ 13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val @@ 13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val @@ 13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
]
|
||||
@@ -1160,7 +1160,7 @@ empty_schema_inference_scenario = (
|
||||
"data": {
|
||||
"col1": "val11",
|
||||
"col2": "val12",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1169,7 +1169,7 @@ empty_schema_inference_scenario = (
|
||||
"data": {
|
||||
"col1": "val21",
|
||||
"col2": "val22",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1241,7 +1241,7 @@ schemaless_csv_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val11a", "col2": "val12a"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1249,7 +1249,7 @@ schemaless_csv_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val21a", "col2": "val22a"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1257,7 +1257,7 @@ schemaless_csv_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1265,7 +1265,7 @@ schemaless_csv_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1357,7 +1357,7 @@ schemaless_csv_multi_stream_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val11a", "col2": "val12a"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
@@ -1365,17 +1365,17 @@ schemaless_csv_multi_stream_scenario = (
|
||||
{
|
||||
"data": {
|
||||
"data": {"col1": "val21a", "col2": "val22a"},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv",
|
||||
},
|
||||
"stream": "stream1",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
{
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"},
|
||||
"data": {"col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"},
|
||||
"stream": "stream2",
|
||||
},
|
||||
]
|
||||
|
||||
@@ -48,14 +48,15 @@ single_csv_input_state_is_earlier_scenario = (
|
||||
))
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
"some_old_file.csv": "2023-06-01T03:54:07.000000Z",
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -139,6 +140,7 @@ single_csv_file_is_skipped_if_same_modified_at_as_in_history = (
|
||||
"history": {
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -217,13 +219,14 @@ single_csv_file_is_synced_if_modified_at_is_more_recent_than_in_history = (
|
||||
))
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -316,13 +319,14 @@ single_csv_no_input_state_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_a.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -401,11 +405,11 @@ multi_csv_same_timestamp_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -413,6 +417,7 @@ multi_csv_same_timestamp_scenario = (
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -479,14 +484,15 @@ single_csv_input_state_is_later_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
"recent_file.csv": "2023-07-15T23:59:59.000000Z",
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-07-15T23:59:59.000000Z_recent_file.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -576,18 +582,19 @@ multi_csv_different_timestamps_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-04T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-04T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
"a.csv": "2023-06-04T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-04T03:54:07.000000Z_a.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -595,6 +602,7 @@ multi_csv_different_timestamps_scenario = (
|
||||
"a.csv": "2023-06-04T03:54:07.000000Z",
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -681,11 +689,11 @@ multi_csv_per_timestamp_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -693,11 +701,12 @@ multi_csv_per_timestamp_scenario = (
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -706,6 +715,7 @@ multi_csv_per_timestamp_scenario = (
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"c.csv": "2023-06-06T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -794,9 +804,9 @@ multi_csv_skip_file_if_already_in_history = (
|
||||
[
|
||||
# {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
|
||||
# {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -804,11 +814,12 @@ multi_csv_skip_file_if_already_in_history = (
|
||||
"a.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_b.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -817,6 +828,7 @@ multi_csv_skip_file_if_already_in_history = (
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"c.csv": "2023-06-06T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -914,9 +926,9 @@ multi_csv_include_missing_files_within_history_range = (
|
||||
[
|
||||
# {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # this file is skipped
|
||||
# {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"}, # this file is skipped
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c"}, "stream": "stream1"}, # this file is skipped
|
||||
# {"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c"}, "stream": "stream1"}, # this file is skipped
|
||||
@@ -927,6 +939,7 @@ multi_csv_include_missing_files_within_history_range = (
|
||||
"b.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"c.csv": "2023-06-06T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_c.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -1026,8 +1039,8 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
"history": {
|
||||
@@ -1035,11 +1048,12 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
||||
"old_file_same_timestamp_as_a.csv": "2023-06-06T03:54:07.000000Z",
|
||||
"a.csv": "2023-06-06T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z_old_file_same_timestamp_as_a.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-07T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-07T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1048,11 +1062,12 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
||||
"a.csv": "2023-06-06T03:54:07.000000Z",
|
||||
"b.csv": "2023-06-07T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-07T03:54:07.000000Z_b.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-10T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-10T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1061,6 +1076,7 @@ multi_csv_remove_old_files_if_history_is_full_scenario = (
|
||||
"b.csv": "2023-06-07T03:54:07.000000Z",
|
||||
"c.csv": "2023-06-10T03:54:07.000000Z"
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-10T03:54:07.000000Z_c.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -1170,19 +1186,19 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "col2": "val12c", "col3": "val13c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "col2": "val22c", "col3": "val23c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11d", "col2": "val12d", "col3": "val13d", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11d", "col2": "val12d", "col3": "val13d", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21d", "col2": "val22d", "col3": "val23d", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21d", "col2": "val22d", "col3": "val23d", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1191,6 +1207,7 @@ multi_csv_same_timestamp_more_files_than_history_size_scenario = (
|
||||
"c.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"d.csv": "2023-06-05T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1294,6 +1311,7 @@ multi_csv_sync_recent_files_if_history_is_incomplete_scenario = (
|
||||
"c.csv": "2023-06-05T03:54:07.000000Z",
|
||||
"d.csv": "2023-06-05T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z_d.csv",
|
||||
}
|
||||
}
|
||||
]
|
||||
@@ -1405,9 +1423,9 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
|
||||
[
|
||||
# {"data": {"col1": "val11a", "col2": "val12a"}, "stream": "stream1"}, # This file is skipped because it is older than the time_window
|
||||
# {"data": {"col1": "val21a", "col2": "val22a"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1416,6 +1434,7 @@ multi_csv_sync_files_within_time_window_if_history_is_incomplete__different_time
|
||||
"d.csv": "2023-06-08T03:54:07.000000Z",
|
||||
"e.csv": "2023-06-08T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_e.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
@@ -1525,9 +1544,9 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1536,11 +1555,12 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
||||
"c.csv": "2023-06-07T03:54:07.000000Z",
|
||||
"d.csv": "2023-06-08T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
||||
}
|
||||
},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-06T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-06T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{
|
||||
"stream1": {
|
||||
@@ -1549,6 +1569,7 @@ multi_csv_sync_files_within_history_time_window_if_history_is_incomplete_differe
|
||||
"c.csv": "2023-06-07T03:54:07.000000Z",
|
||||
"d.csv": "2023-06-08T03:54:07.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2023-06-08T03:54:07.000000Z_d.csv",
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
@@ -64,9 +64,9 @@ single_jsonl_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -141,13 +141,13 @@ multi_jsonl_with_different_keys_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -218,13 +218,13 @@ multi_jsonl_stream_n_file_exceeds_limit_for_inference = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -296,13 +296,13 @@ multi_jsonl_stream_n_bytes_exceeds_limit_for_inference = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -364,7 +364,7 @@ invalid_jsonl_scenario = (
|
||||
}
|
||||
)
|
||||
.set_expected_records([
|
||||
{"data": {"col1": "val1", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val1", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
])
|
||||
.set_expected_discover_error(SchemaInferenceError, FileBasedSourceError.SCHEMA_INFERENCE_ERROR.value)
|
||||
@@ -475,17 +475,17 @@ jsonl_multi_stream_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": 1, "col2": "record1", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 1, "col2": "record1", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": 2, "col2": "record2", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 2, "col2": "record2", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream1"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream1"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream2"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream2"},
|
||||
]
|
||||
)
|
||||
@@ -555,13 +555,13 @@ schemaless_jsonl_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"data": {"col1": 1, "col2": "record1"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 1, "col2": "record1"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"data": {"col1": 2, "col2": "record2"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 2, "col2": "record2"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"data": {"col1": 3, "col2": "record3", "col3": 1.1}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 3, "col2": "record3", "col3": 1.1}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"data": {"col1": 4, "col2": "record4", "col3": 1.1}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 4, "col2": "record4", "col3": 1.1}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -657,13 +657,13 @@ schemaless_jsonl_multi_stream_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"data": {"col1": 1, "col2": "record1"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 1, "col2": "record1"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"data": {"col1": 2, "col2": "record2"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"data": {"col1": 2, "col2": "record2"}, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 1.1, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream2"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.jsonl"},
|
||||
{"data": {"col3": 2.2, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.jsonl"},
|
||||
"stream": "stream2"},
|
||||
]
|
||||
)
|
||||
@@ -728,9 +728,9 @@ jsonl_user_input_schema_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": 1, "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 1, "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
{"data": {"col1": 2, "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 2, "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.jsonl"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -170,9 +170,9 @@ single_parquet_scenario = (
|
||||
.set_file_type("parquet")
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -258,13 +258,13 @@ multi_parquet_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": "val12a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21a", "col2": "val22a", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.parquet"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -404,7 +404,7 @@ parquet_various_types_scenario = (
|
||||
"col_list": [1, 2, 3, 4],
|
||||
"col_duration": 12345,
|
||||
"col_binary": "binary string. Hello world!",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
"_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"
|
||||
},
|
||||
]
|
||||
@@ -430,7 +430,7 @@ parquet_file_with_decimal_no_config_scenario = (
|
||||
.set_file_type("parquet")
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "13.00", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "13.00", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -487,7 +487,7 @@ parquet_file_with_decimal_as_string_scenario = (
|
||||
.set_file_type("parquet")
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "13.00", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "13.00", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -544,7 +544,7 @@ parquet_file_with_decimal_as_float_scenario = (
|
||||
.set_file_type("parquet")
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": 13.00, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 13.00, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -598,7 +598,7 @@ parquet_file_with_decimal_legacy_config_scenario = (
|
||||
.set_file_type("parquet")
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": 13.00, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": 13.00, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.parquet"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -61,9 +61,9 @@ _base_user_input_schema_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11", "col2": "val12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21", "col2": "val22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
@@ -338,18 +338,18 @@ _base_multi_stream_user_input_schema_scenario = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
# The files in b.csv are emitted despite having an invalid schema
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
]
|
||||
)
|
||||
@@ -536,17 +536,17 @@ multi_stream_user_input_schema_scenario_emit_nonconforming_records = (
|
||||
.set_expected_check_error(None, FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA.value)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
]
|
||||
)
|
||||
@@ -675,17 +675,17 @@ multi_stream_user_input_schema_scenario_skip_nonconforming_records = (
|
||||
.set_expected_check_error(None, FileBasedSourceError.ERROR_PARSING_USER_PROVIDED_SCHEMA.value)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11a", "col2": 21, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val12a", "col2": 22, "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
# {"data": {"col1": "val11b", "col2": "val12b", "col3": "val13b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
# "_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
# {"data": {"col1": "val21b", "col2": "val22b", "col3": "val23b", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
# "_ab_source_file_url": "b.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val11c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z",
|
||||
{"data": {"col1": "val21c", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z",
|
||||
"_ab_source_file_url": "c.csv"}, "stream": "stream3"},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -213,14 +213,14 @@ skip_record_scenario_single_stream = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
# {"data": {"col1": "val_a_11", "col2": "val_a_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_a_12", "col2": "val_a_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_b_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_c_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_c_12", None: "val_c_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
{"data": {"col1": "val_c_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_d_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_a_11", "col2": "val_a_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_a_12", "col2": "val_a_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_b_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_c_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_c_12", None: "val_c_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
{"data": {"col1": "val_c_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_d_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
.set_expected_logs({
|
||||
@@ -262,20 +262,20 @@ skip_record_scenario_multi_stream = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
# {"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
{"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb2_12", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
{"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"}, # This record is skipped because it does not conform
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"}, # This record is skipped because it does not conform
|
||||
# {"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb2_12", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
]
|
||||
)
|
||||
.set_expected_logs({
|
||||
@@ -314,14 +314,14 @@ emit_record_scenario_single_stream = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val_a_11", "col2": "val_a_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_a_12", "col2": "val_a_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_c_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_c_12", None: "val_c_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
# {"data": {"col1": "val_c_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # No more records from this stream are emitted after we hit a parse error
|
||||
# {"data": {"col1": "val_d_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_a_11", "col2": "val_a_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_a_12", "col2": "val_a_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_b_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_c_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_c_12", None: "val_c_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
# {"data": {"col1": "val_c_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "c.csv"}, "stream": "stream1"}, # No more records from this stream are emitted after we hit a parse error
|
||||
# {"data": {"col1": "val_d_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "d.csv"}, "stream": "stream1"},
|
||||
]
|
||||
)
|
||||
.set_expected_logs({
|
||||
@@ -359,20 +359,20 @@ emit_record_scenario_multi_stream = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
{"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
# {"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # No more records from this stream are emitted after we hit a parse error
|
||||
# {"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb2_12", "col2": "val_bb2_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # This record is malformed so should not be emitted
|
||||
# {"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"}, # No more records from this stream are emitted after we hit a parse error
|
||||
# {"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb2_12", "col2": "val_bb2_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
]
|
||||
)
|
||||
.set_expected_logs({
|
||||
@@ -439,20 +439,20 @@ wait_for_rediscovery_scenario_multi_stream = (
|
||||
)
|
||||
.set_expected_records(
|
||||
[
|
||||
# {"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # The first record does not conform so we don't sync anything from this stream
|
||||
# {"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"}, # No more records from this stream are emitted after a nonconforming record is encountered
|
||||
# {"data": {"col1": "val_bb2_12", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_aa1_11", "col2": "val_aa1_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"}, # The first record does not conform so we don't sync anything from this stream
|
||||
# {"data": {"col1": "val_aa1_12", "col2": "val_aa1_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a1.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa2_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa2_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a2.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_12", None: "val_aa3_22", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa3_13", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a3.csv"}, "stream": "stream1"},
|
||||
# {"data": {"col1": "val_aa4_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "a/a4.csv"}, "stream": "stream1"},
|
||||
{"data": {"col1": "val_bb1_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
{"data": {"col1": "val_bb1_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b1.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb2_11", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"}, # No more records from this stream are emitted after a nonconforming record is encountered
|
||||
# {"data": {"col1": "val_bb2_12", "col2": "val_bb2_21", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b2.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb3_11", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
# {"data": {"col1": "val_bb3_12", "_ab_source_file_last_modified": "2023-06-05T03:54:07.000000Z", "_ab_source_file_url": "b/b3.csv"}, "stream": "stream2"},
|
||||
]
|
||||
)
|
||||
.set_expected_logs({
|
||||
|
||||
@@ -30,11 +30,14 @@ from freezegun import freeze_time
|
||||
[datetime(2021, 1, 1),
|
||||
datetime(2021, 1, 1),
|
||||
datetime(2020, 12, 31)],
|
||||
{"history": {
|
||||
"a.csv": "2021-01-01T00:00:00.000000Z",
|
||||
"b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2020-12-31T00:00:00.000000Z",
|
||||
}, },
|
||||
{
|
||||
"history": {
|
||||
"a.csv": "2021-01-01T00:00:00.000000Z",
|
||||
"b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2020-12-31T00:00:00.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2021-01-02T00:00:00.000000Z_b.csv",
|
||||
},
|
||||
id="test_file_start_time_is_earliest_time_in_history"),
|
||||
pytest.param([
|
||||
RemoteFile(uri="a.csv",
|
||||
@@ -55,11 +58,14 @@ from freezegun import freeze_time
|
||||
datetime(2021, 1, 1),
|
||||
datetime(2021, 1, 1),
|
||||
datetime(2021, 1, 2)],
|
||||
{"history": {
|
||||
"b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2021-01-03T00:00:00.000000Z",
|
||||
"d.csv": "2021-01-04T00:00:00.000000Z",
|
||||
}, },
|
||||
{
|
||||
"history": {
|
||||
"b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2021-01-03T00:00:00.000000Z",
|
||||
"d.csv": "2021-01-04T00:00:00.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2021-01-04T00:00:00.000000Z_d.csv",
|
||||
},
|
||||
id="test_earliest_file_is_removed_from_history_if_history_is_full"),
|
||||
pytest.param([
|
||||
RemoteFile(uri="a.csv",
|
||||
@@ -85,11 +91,14 @@ from freezegun import freeze_time
|
||||
datetime(2021, 1, 2),
|
||||
datetime(2021, 1, 2),
|
||||
],
|
||||
{"history": {
|
||||
"file_with_same_timestamp_as_b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2021-01-03T00:00:00.000000Z",
|
||||
"d.csv": "2021-01-04T00:00:00.000000Z",
|
||||
}, },
|
||||
{
|
||||
"history": {
|
||||
"file_with_same_timestamp_as_b.csv": "2021-01-02T00:00:00.000000Z",
|
||||
"c.csv": "2021-01-03T00:00:00.000000Z",
|
||||
"d.csv": "2021-01-04T00:00:00.000000Z",
|
||||
},
|
||||
"_ab_source_file_last_modified": "2021-01-04T00:00:00.000000Z_d.csv",
|
||||
},
|
||||
id="test_files_are_sorted_by_timestamp_and_by_name"),
|
||||
],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user