1
0
mirror of synced 2026-01-18 06:04:45 -05:00
Files
airbyte/airbyte-integrations/bases/source-acceptance-test/source_acceptance_test/utils/asserts.py
Alexandre Girard 3894134d11 Bump year in license short to 2022 (#13191)
* Bump to 2022

* format
2022-05-25 17:56:49 -07:00

65 lines
2.1 KiB
Python

#
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#
import logging
import re
from collections import defaultdict
from typing import List, Mapping
import pendulum
from airbyte_cdk.models import AirbyteRecordMessage, ConfiguredAirbyteCatalog
from jsonschema import Draft7Validator, FormatChecker, FormatError, ValidationError
# fmt: off
timestamp_regex = re.compile((r"^\d{4}-\d?\d-\d?\d" # date
r"(\s|T)" # separator
r"\d?\d:\d?\d:\d?\d(.\d+)?" # time
r".*$")) # timezone
# fmt: on
class CustomFormatChecker(FormatChecker):
@staticmethod
def check_datetime(value: str) -> bool:
valid_format = timestamp_regex.match(value)
try:
pendulum.parse(value, strict=False)
except ValueError:
valid_time = False
else:
valid_time = True
return valid_format and valid_time
def check(self, instance, format):
if instance is not None and format == "date-time":
if not self.check_datetime(instance):
raise FormatError(f"{instance} has invalid datetime format")
else:
return super().check(instance, format)
def verify_records_schema(
records: List[AirbyteRecordMessage], catalog: ConfiguredAirbyteCatalog
) -> Mapping[str, Mapping[str, ValidationError]]:
"""Check records against their schemas from the catalog, yield error messages.
Only first record with error will be yielded for each stream.
"""
validators = {}
for stream in catalog.streams:
validators[stream.stream.name] = Draft7Validator(stream.stream.json_schema, format_checker=CustomFormatChecker())
stream_errors = defaultdict(dict)
for record in records:
validator = validators.get(record.stream)
if not validator:
logging.error(f"Record from the {record.stream} stream that is not in the catalog.")
continue
errors = list(validator.iter_errors(record.data))
for error in errors:
stream_errors[record.stream][str(error.schema_path)] = error
return stream_errors