1
0
mirror of synced 2026-01-22 19:01:47 -05:00
Files
airbyte/airbyte-integrations/connectors/source-shopify/source_shopify/source.py

923 lines
34 KiB
Python

#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#
import logging
from abc import ABC, abstractmethod
from functools import cached_property
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
from urllib.parse import parse_qsl, urlparse
import requests
from airbyte_cdk import AirbyteLogger
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpStream
from requests.exceptions import ConnectionError, InvalidURL, JSONDecodeError, RequestException, SSLError
from .auth import ShopifyAuthenticator
from .graphql import get_query_products
from .transform import DataTypeEnforcer
from .utils import SCOPES_MAPPING, ApiTypeEnum
from .utils import EagerlyCachedStreamState as stream_state_cache
from .utils import ShopifyAccessScopesError, ShopifyBadJsonError, ShopifyConnectionError, ShopifyNonRetryableErrors
from .utils import ShopifyRateLimiter as limiter
from .utils import ShopifyWrongShopNameError
class ShopifyStream(HttpStream, ABC):
# Latest Stable Release
api_version = "2022-10"
# Page size
limit = 250
# Define primary key as sort key for full_refresh, or very first sync for incremental_refresh
primary_key = "id"
order_field = "updated_at"
filter_field = "updated_at_min"
# define default logger
logger = logging.getLogger("airbyte")
raise_on_http_errors = True
max_retries = 5
def __init__(self, config: Dict):
super().__init__(authenticator=config["authenticator"])
self._transformer = DataTypeEnforcer(self.get_json_schema())
self.config = config
@property
def url_base(self) -> str:
return f"https://{self.config['shop']}.myshopify.com/admin/api/{self.api_version}/"
@property
def default_filter_field_value(self) -> Union[int, str]:
# certain streams are using `since_id` field as `filter_field`, which requires to use `int` type,
# but many other use `str` values for this, we determine what to use based on `filter_field` value
# by default, we use the user defined `Start Date` as initial value, or 0 for `id`-dependent streams.
return 0 if self.filter_field == "since_id" else self.config["start_date"]
@staticmethod
def next_page_token(response: requests.Response) -> Optional[Mapping[str, Any]]:
next_page = response.links.get("next", None)
if next_page:
return dict(parse_qsl(urlparse(next_page.get("url")).query))
else:
return None
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = {"limit": self.limit}
if next_page_token:
params.update(**next_page_token)
else:
params["order"] = f"{self.order_field} asc"
params[self.filter_field] = self.default_filter_field_value
return params
@limiter.balance_rate_limit()
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
if response.status_code is requests.codes.OK:
try:
json_response = response.json()
records = json_response.get(self.data_field, []) if self.data_field is not None else json_response
yield from self.produce_records(records)
except RequestException as e:
self.logger.warning(f"Unexpected error in `parse_ersponse`: {e}, the actual response data: {response.text}")
yield {}
def produce_records(self, records: Union[Iterable[Mapping[str, Any]], Mapping[str, Any]] = None) -> Iterable[Mapping[str, Any]]:
# transform method was implemented according to issue 4841
# Shopify API returns price fields as a string and it should be converted to number
# this solution designed to convert string into number, but in future can be modified for general purpose
if isinstance(records, dict):
# for cases when we have a single record as dict
# add shop_url to the record to make querying easy
records["shop_url"] = self.config["shop"]
yield self._transformer.transform(records)
else:
# for other cases
for record in records:
# add shop_url to the record to make querying easy
record["shop_url"] = self.config["shop"]
yield self._transformer.transform(record)
def should_retry(self, response: requests.Response) -> bool:
known_errors = ShopifyNonRetryableErrors(self.name)
status = response.status_code
if status in known_errors.keys():
setattr(self, "raise_on_http_errors", False)
self.logger.warning(known_errors.get(status))
return False
else:
return super().should_retry(response)
@property
@abstractmethod
def data_field(self) -> str:
"""The name of the field in the response which contains the data"""
class IncrementalShopifyStream(ShopifyStream, ABC):
# Setting the check point interval to the limit of the records output
@property
def state_checkpoint_interval(self) -> int:
return super().limit
# Setting the default cursor field for all streams
cursor_field = "updated_at"
@property
def default_state_comparison_value(self) -> Union[int, str]:
# certain streams are using `id` field as `cursor_field`, which requires to use `int` type,
# but many other use `str` values for this, we determine what to use based on `cursor_field` value
return 0 if self.cursor_field == "id" else ""
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
last_record_value = latest_record.get(self.cursor_field) or self.default_state_comparison_value
current_state_value = current_stream_state.get(self.cursor_field) or self.default_state_comparison_value
return {self.cursor_field: max(last_record_value, current_state_value)}
@stream_state_cache.cache_stream_state
def request_params(self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs):
params = super().request_params(stream_state=stream_state, next_page_token=next_page_token, **kwargs)
# If there is a next page token then we should only send pagination-related parameters.
if not next_page_token:
params["order"] = f"{self.order_field} asc"
if stream_state:
params[self.filter_field] = stream_state.get(self.cursor_field)
return params
# Parse the `stream_slice` with respect to `stream_state` for `Incremental refresh`
# cases where we slice the stream, the endpoints for those classes don't accept any other filtering,
# but they provide us with the updated_at field in most cases, so we used that as incremental filtering during the order slicing.
def filter_records_newer_than_state(self, stream_state: Mapping[str, Any] = None, records_slice: Iterable[Mapping] = None) -> Iterable:
# Getting records >= state
if stream_state:
state_value = stream_state.get(self.cursor_field)
for record in records_slice:
if self.cursor_field in record:
record_value = record.get(self.cursor_field, self.default_state_comparison_value)
if record_value:
if record_value >= state_value:
yield record
else:
# old entities could have cursor field in place, but set to null
self.logger.warning(
f"Stream `{self.name}`, Record ID: `{record.get(self.primary_key)}` cursor value is: {record_value}, record is emitted without state comparison"
)
yield record
else:
# old entities could miss the cursor field
self.logger.warning(
f"Stream `{self.name}`, Record ID: `{record.get(self.primary_key)}` missing cursor field: {self.cursor_field}, record is emitted without state comparison"
)
yield record
else:
yield from records_slice
class ShopifySubstream(IncrementalShopifyStream):
"""
ShopifySubstream - provides slicing functionality for streams using parts of data from parent stream.
For example:
- `Refunds Orders` is the entity of `Orders`,
- `OrdersRisks` is the entity of `Orders`,
- `DiscountCodes` is the entity of `PriceRules`, etc.
:: @ parent_stream - defines the parent stream object to read from
:: @ slice_key - defines the name of the property in stream slices dict.
:: @ nested_record - the name of the field inside of parent stream record. Default is `id`.
:: @ nested_record_field_name - the name of the field inside of nested_record.
:: @ nested_substream - the name of the nested entity inside of parent stream, helps to reduce the number of
API Calls, if present, see `OrderRefunds` stream for more.
"""
parent_stream_class: object = None
slice_key: str = None
nested_record: str = "id"
nested_record_field_name: str = None
nested_substream = None
nested_substream_list_field_id = None
@cached_property
def parent_stream(self) -> object:
"""
Returns the instance of parent stream, if the substream has a `parent_stream_class` dependency.
"""
return self.parent_stream_class(self.config) if self.parent_stream_class else None
def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
"""UPDATING THE STATE OBJECT:
Stream: Transactions
Parent Stream: Orders
Returns:
{
{...},
"transactions": {
"created_at": "2022-03-03T03:47:45-08:00",
"orders": {
"updated_at": "2022-03-03T03:47:46-08:00"
}
},
{...},
}
"""
updated_state = super().get_updated_state(current_stream_state, latest_record)
# add parent_stream_state to `updated_state`
updated_state[self.parent_stream.name] = stream_state_cache.cached_state.get(self.parent_stream.name)
return updated_state
def request_params(self, next_page_token: Mapping[str, Any] = None, **kwargs) -> MutableMapping[str, Any]:
params = {"limit": self.limit}
if next_page_token:
params.update(**next_page_token)
return params
def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
"""
Reading the parent stream for slices with structure:
EXAMPLE: for given nested_record as `id` of Orders,
Outputs:
[
{slice_key: 123},
{slice_key: 456},
{...},
{slice_key: 999
]
"""
sorted_substream_slices = []
# reading parent nested stream_state from child stream state
parent_stream_state = stream_state.get(self.parent_stream.name) if stream_state else {}
# reading the parent stream
for record in self.parent_stream.read_records(stream_state=parent_stream_state, **kwargs):
# updating the `stream_state` with the state of it's parent stream
# to have the child stream sync independently from the parent stream
stream_state_cache.cached_state[self.parent_stream.name] = self.parent_stream.get_updated_state({}, record)
# to limit the number of API Calls and reduce the time of data fetch,
# we can pull the ready data for child_substream, if nested data is present,
# and corresponds to the data of child_substream we need.
if self.nested_substream and self.nested_substream_list_field_id:
if record.get(self.nested_substream):
sorted_substream_slices.extend(
[
{
self.slice_key: sub_record[self.nested_substream_list_field_id],
self.cursor_field: record[self.nested_substream][0].get(
self.cursor_field, self.default_state_comparison_value
),
}
for sub_record in record[self.nested_record]
]
)
elif self.nested_substream:
if record.get(self.nested_substream):
sorted_substream_slices.append(
{
self.slice_key: record[self.nested_record],
self.cursor_field: record[self.nested_substream][0].get(self.cursor_field, self.default_state_comparison_value),
}
)
else:
yield {self.slice_key: record[self.nested_record]}
# output slice from sorted list to avoid filtering older records
if self.nested_substream:
if len(sorted_substream_slices) > 0:
# sort by cursor_field
sorted_substream_slices.sort(key=lambda x: x.get(self.cursor_field))
for sorted_slice in sorted_substream_slices:
yield {self.slice_key: sorted_slice[self.slice_key]}
def read_records(
self,
stream_state: Mapping[str, Any] = None,
stream_slice: Optional[Mapping[str, Any]] = None,
**kwargs,
) -> Iterable[Mapping[str, Any]]:
"""Reading child streams records for each `id`"""
slice_data = stream_slice.get(self.slice_key)
# sometimes the stream_slice.get(self.slice_key) has the list of records,
# to avoid data exposition inside the logs, we should get the data we need correctly out of stream_slice.
if isinstance(slice_data, list) and self.nested_record_field_name is not None and len(slice_data) > 0:
slice_data = slice_data[0].get(self.nested_record_field_name)
self.logger.info(f"Reading {self.name} for {self.slice_key}: {slice_data}")
records = super().read_records(stream_slice=stream_slice, **kwargs)
yield from self.filter_records_newer_than_state(stream_state=stream_state, records_slice=records)
class MetafieldShopifySubstream(ShopifySubstream):
slice_key = "id"
data_field = "metafields"
parent_stream_class: object = None
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
object_id = stream_slice[self.slice_key]
return f"{self.parent_stream_class.data_field}/{object_id}/{self.data_field}.json"
class Articles(IncrementalShopifyStream):
data_field = "articles"
cursor_field = "id"
order_field = "id"
filter_field = "since_id"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldArticles(MetafieldShopifySubstream):
parent_stream_class: object = Articles
class Blogs(IncrementalShopifyStream):
cursor_field = "id"
order_field = "id"
data_field = "blogs"
filter_field = "since_id"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldBlogs(MetafieldShopifySubstream):
parent_stream_class: object = Blogs
class Customers(IncrementalShopifyStream):
data_field = "customers"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldCustomers(MetafieldShopifySubstream):
parent_stream_class: object = Customers
class Orders(IncrementalShopifyStream):
data_field = "orders"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
def request_params(
self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs
) -> MutableMapping[str, Any]:
params = super().request_params(stream_state=stream_state, next_page_token=next_page_token, **kwargs)
if not next_page_token:
params["status"] = "any"
return params
class MetafieldOrders(MetafieldShopifySubstream):
parent_stream_class: object = Orders
class DraftOrders(IncrementalShopifyStream):
data_field = "draft_orders"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldDraftOrders(MetafieldShopifySubstream):
parent_stream_class: object = DraftOrders
class Products(IncrementalShopifyStream):
use_cache = True
data_field = "products"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class ProductsGraphQl(IncrementalShopifyStream):
filter_field = "updatedAt"
cursor_field = "updatedAt"
data_field = "graphql"
http_method = "POST"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
def request_params(
self,
stream_state: Optional[Mapping[str, Any]] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
**kwargs,
) -> MutableMapping[str, Any]:
return {}
def request_body_json(
self,
stream_state: Mapping[str, Any],
stream_slice: Optional[Mapping[str, Any]] = None,
next_page_token: Optional[Mapping[str, Any]] = None,
) -> Optional[Mapping]:
state_value = stream_state.get(self.filter_field)
if state_value:
filter_value = state_value
else:
filter_value = self.default_filter_field_value
query = get_query_products(
first=self.limit, filter_field=self.filter_field, filter_value=filter_value, next_page_token=next_page_token
)
return {"query": query}
@staticmethod
def next_page_token(response: requests.Response) -> Optional[Mapping[str, Any]]:
page_info = response.json()["data"]["products"]["pageInfo"]
has_next_page = page_info["hasNextPage"]
if has_next_page:
return page_info["endCursor"]
else:
return None
@limiter.balance_rate_limit(api_type=ApiTypeEnum.graphql.value)
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
if response.status_code is requests.codes.OK:
try:
json_response = response.json()["data"]["products"]["nodes"]
yield from self.produce_records(json_response)
except RequestException as e:
self.logger.warning(f"Unexpected error in `parse_ersponse`: {e}, the actual response data: {response.text}")
yield {}
class MetafieldProducts(MetafieldShopifySubstream):
parent_stream_class: object = Products
class ProductImages(ShopifySubstream):
parent_stream_class: object = Products
cursor_field = "id"
slice_key = "product_id"
data_field = "images"
nested_substream = "images"
filter_field = "since_id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
product_id = stream_slice[self.slice_key]
return f"products/{product_id}/{self.data_field}.json"
class MetafieldProductImages(MetafieldShopifySubstream):
parent_stream_class: object = Products
nested_record = "images"
slice_key = "images"
nested_substream = "images"
nested_substream_list_field_id = "id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
image_id = stream_slice[self.slice_key]
return f"product_images/{image_id}/{self.data_field}.json"
class ProductVariants(ShopifySubstream):
parent_stream_class: object = Products
cursor_field = "id"
slice_key = "product_id"
data_field = "variants"
nested_substream = "variants"
filter_field = "since_id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
product_id = stream_slice[self.slice_key]
return f"products/{product_id}/{self.data_field}.json"
class MetafieldProductVariants(MetafieldShopifySubstream):
parent_stream_class: object = Products
nested_record = "variants"
slice_key = "variants"
nested_substream = "variants"
nested_substream_list_field_id = "id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
variant_id = stream_slice[self.slice_key]
return f"variants/{variant_id}/{self.data_field}.json"
class AbandonedCheckouts(IncrementalShopifyStream):
data_field = "checkouts"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
def request_params(
self, stream_state: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, **kwargs
) -> MutableMapping[str, Any]:
params = super().request_params(stream_state=stream_state, next_page_token=next_page_token, **kwargs)
# If there is a next page token then we should only send pagination-related parameters.
if not next_page_token:
params["status"] = "any"
return params
class CustomCollections(IncrementalShopifyStream):
data_field = "custom_collections"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class SmartCollections(IncrementalShopifyStream):
data_field = "smart_collections"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldSmartCollections(MetafieldShopifySubstream):
parent_stream_class: object = SmartCollections
class Collects(IncrementalShopifyStream):
"""
Collects stream does not support Incremental Refresh based on datetime fields, only `since_id` is supported:
https://shopify.dev/docs/admin-api/rest/reference/products/collect
The Collect stream is the link between Products and Collections, if the Collection is created for Products,
the `collect` record is created, it's reasonable to Full Refresh all collects. As for Incremental refresh -
we would use the since_id specificaly for this stream.
"""
data_field = "collects"
cursor_field = "id"
order_field = "id"
filter_field = "since_id"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class Collections(ShopifySubstream):
parent_stream_class: object = Collects
nested_record = "collection_id"
slice_key = "collection_id"
data_field = "collection"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
collection_id = stream_slice[self.slice_key]
return f"collections/{collection_id}.json"
class MetafieldCollections(MetafieldShopifySubstream):
parent_stream_class: object = Collects
slice_key = "collection_id"
nested_record = "collection_id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
object_id = stream_slice[self.slice_key]
return f"collections/{object_id}/{self.data_field}.json"
class BalanceTransactions(IncrementalShopifyStream):
"""
PaymentsTransactions stream does not support Incremental Refresh based on datetime fields, only `since_id` is supported:
https://shopify.dev/api/admin-rest/2021-07/resources/transactions
"""
data_field = "transactions"
cursor_field = "id"
order_field = "id"
filter_field = "since_id"
def path(self, **kwargs) -> str:
return f"shopify_payments/balance/{self.data_field}.json"
class OrderRefunds(ShopifySubstream):
parent_stream_class: object = Orders
slice_key = "order_id"
data_field = "refunds"
cursor_field = "created_at"
# we pull out the records that we already know has the refunds data from Orders object
nested_substream = "refunds"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
order_id = stream_slice["order_id"]
return f"orders/{order_id}/{self.data_field}.json"
class OrderRisks(ShopifySubstream):
parent_stream_class: object = Orders
slice_key = "order_id"
data_field = "risks"
cursor_field = "id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
order_id = stream_slice["order_id"]
return f"orders/{order_id}/{self.data_field}.json"
class Transactions(ShopifySubstream):
parent_stream_class: object = Orders
slice_key = "order_id"
data_field = "transactions"
cursor_field = "created_at"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
order_id = stream_slice["order_id"]
return f"orders/{order_id}/{self.data_field}.json"
class TenderTransactions(IncrementalShopifyStream):
data_field = "tender_transactions"
cursor_field = "processed_at"
filter_field = "processed_at_min"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class Pages(IncrementalShopifyStream):
data_field = "pages"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldPages(MetafieldShopifySubstream):
parent_stream_class: object = Pages
class PriceRules(IncrementalShopifyStream):
data_field = "price_rules"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class DiscountCodes(ShopifySubstream):
parent_stream_class: object = PriceRules
slice_key = "price_rule_id"
data_field = "discount_codes"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
price_rule_id = stream_slice["price_rule_id"]
return f"price_rules/{price_rule_id}/{self.data_field}.json"
class Locations(ShopifyStream):
"""
The location API does not support any form of filtering.
https://shopify.dev/api/admin-rest/2021-07/resources/location
Therefore, only FULL_REFRESH mode is supported.
"""
data_field = "locations"
def path(self, **kwargs):
return f"{self.data_field}.json"
class MetafieldLocations(MetafieldShopifySubstream):
parent_stream_class: object = Locations
class InventoryLevels(ShopifySubstream):
parent_stream_class: object = Locations
slice_key = "location_id"
data_field = "inventory_levels"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
location_id = stream_slice["location_id"]
return f"locations/{location_id}/{self.data_field}.json"
def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapping]:
records_stream = super().parse_response(response, **kwargs)
def generate_key(record):
record.update({"id": "|".join((str(record.get("location_id", "")), str(record.get("inventory_item_id", ""))))})
return record
# associate the surrogate key
yield from map(generate_key, records_stream)
class InventoryItems(ShopifySubstream):
parent_stream_class: object = Products
slice_key = "id"
nested_record = "variants"
nested_record_field_name = "inventory_item_id"
data_field = "inventory_items"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
ids = ",".join(str(x[self.nested_record_field_name]) for x in stream_slice[self.slice_key])
return f"inventory_items.json?ids={ids}"
class FulfillmentOrders(ShopifySubstream):
parent_stream_class: object = Orders
slice_key = "order_id"
data_field = "fulfillment_orders"
cursor_field = "id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
order_id = stream_slice[self.slice_key]
return f"orders/{order_id}/{self.data_field}.json"
class Fulfillments(ShopifySubstream):
parent_stream_class: object = Orders
slice_key = "order_id"
data_field = "fulfillments"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
order_id = stream_slice[self.slice_key]
return f"orders/{order_id}/{self.data_field}.json"
class Shop(ShopifyStream):
data_field = "shop"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class MetafieldShops(IncrementalShopifyStream):
data_field = "metafields"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class CustomerSavedSearch(IncrementalShopifyStream):
api_version = "2022-01"
cursor_field = "id"
order_field = "id"
data_field = "customer_saved_searches"
filter_field = "since_id"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class CustomerAddress(ShopifySubstream):
parent_stream_class: object = Customers
slice_key = "id"
data_field = "addresses"
cursor_field = "id"
def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str:
customer_id = stream_slice[self.slice_key]
return f"customers/{customer_id}/{self.data_field}.json"
class Countries(ShopifyStream):
data_field = "countries"
def path(self, **kwargs) -> str:
return f"{self.data_field}.json"
class ConnectionCheckTest:
def __init__(self, config: Mapping[str, Any]):
self.config = config
# use `Shop` as a test stream for connection check
self.test_stream = Shop(self.config)
# setting `max_retries` to 0 for the stage of `check connection`,
# because it keeps retrying for wrong shop names,
# but it should stop immediately
self.test_stream.max_retries = 0
def describe_error(self, pattern: str, shop_name: str = None, details: Any = None, **kwargs) -> str:
connection_check_errors_map: Mapping[str, Any] = {
"connection_error": f"Connection could not be established using `Shopify Store`: {shop_name}. Make sure it's valid and try again.",
"request_exception": f"Request was not successfull, check your `input configuation` and try again. Details: {details}",
"index_error": f"Failed to access the Shopify store `{shop_name}`. Verify the entered Shopify store or API Key in `input configuration`.",
# add the other patterns and description, if needed...
}
return connection_check_errors_map.get(pattern)
def test_connection(self) -> tuple[bool, str]:
shop_name = self.config.get("shop")
if not shop_name:
return False, "The `Shopify Store` name is missing. Make sure it's entered and valid."
try:
response = list(self.test_stream.read_records(sync_mode=None))
# check for the shop_id is present in the response
shop_id = response[0].get("id")
if shop_id is not None:
return True, None
else:
return False, f"The `shop_id` is invalid: {shop_id}"
except (SSLError, ConnectionError):
return False, self.describe_error("connection_error", shop_name)
except RequestException as req_error:
return False, self.describe_error("request_exception", details=req_error)
except IndexError:
return False, self.describe_error("index_error", shop_name, response)
class SourceShopify(AbstractSource):
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, any]:
"""
Testing connection availability for the connector.
"""
config["authenticator"] = ShopifyAuthenticator(config)
return ConnectionCheckTest(config).test_connection()
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
"""
Mapping a input config of the user input configuration as defined in the connector spec.
Defining streams to run.
"""
config["authenticator"] = ShopifyAuthenticator(config)
user_scopes = self.get_user_scopes(config)
always_permitted_streams = ["MetafieldShops", "Shop", "Countries"]
permitted_streams = [
stream
for user_scope in user_scopes
if user_scope["handle"] in SCOPES_MAPPING
for stream in SCOPES_MAPPING.get(user_scope["handle"])
] + always_permitted_streams
# before adding stream to stream_instances list, please add it to SCOPES_MAPPING
stream_instances = [
AbandonedCheckouts(config),
Articles(config),
BalanceTransactions(config),
Blogs(config),
Collections(config),
Collects(config),
CustomCollections(config),
Customers(config),
DiscountCodes(config),
DraftOrders(config),
FulfillmentOrders(config),
Fulfillments(config),
InventoryItems(config),
InventoryLevels(config),
Locations(config),
MetafieldArticles(config),
MetafieldBlogs(config),
MetafieldCollections(config),
MetafieldCustomers(config),
MetafieldDraftOrders(config),
MetafieldLocations(config),
MetafieldOrders(config),
MetafieldPages(config),
MetafieldProductImages(config),
MetafieldProducts(config),
MetafieldProductVariants(config),
MetafieldShops(config),
MetafieldSmartCollections(config),
OrderRefunds(config),
OrderRisks(config),
Orders(config),
Pages(config),
PriceRules(config),
ProductImages(config),
Products(config),
ProductsGraphQl(config),
ProductVariants(config),
Shop(config),
SmartCollections(config),
TenderTransactions(config),
Transactions(config),
CustomerSavedSearch(config),
CustomerAddress(config),
Countries(config),
]
return [stream_instance for stream_instance in stream_instances if self.format_name(stream_instance.name) in permitted_streams]
@staticmethod
def get_user_scopes(config):
session = requests.Session()
url = f"https://{config['shop']}.myshopify.com/admin/oauth/access_scopes.json"
headers = config["authenticator"].get_auth_header()
try:
response = session.get(url, headers=headers).json()
access_scopes = response.get("access_scopes")
except InvalidURL:
raise ShopifyWrongShopNameError(url)
except JSONDecodeError as json_error:
raise ShopifyBadJsonError(json_error)
except (SSLError, ConnectionError) as con_error:
raise ShopifyConnectionError(con_error)
if access_scopes:
return access_scopes
else:
raise ShopifyAccessScopesError(response)
@staticmethod
def format_name(name):
return "".join(x.capitalize() for x in name.split("_"))