1
0
mirror of synced 2025-12-22 03:21:25 -05:00
Files
airbyte/airbyte-integrations/connectors/source-mixpanel/source_mixpanel/manifest.yaml

841 lines
31 KiB
YAML

version: 0.80.0
type: DeclarativeSource
definitions:
schema_loader:
type: JsonFileSchemaLoader
file_path: "./source_mixpanel/schemas/{{ parameters['name'] }}.json"
api_token_auth:
type: ApiKeyAuthenticator
api_token: "Basic {{ config['credentials']['api_secret'] | base64encode }}"
inject_into:
type: RequestOption
inject_into: header
field_name: Authorization
basic_http_authenticator:
type: BasicHttpAuthenticator
username: "{{ config['credentials']['username'] }}"
password: "{{ config['credentials']['secret'] }}"
authenticator:
type: SelectiveAuthenticator
authenticator_selection_path: ["credentials", "option_title"]
authenticators:
Project Secret: "#/definitions/api_token_auth"
Service Account: "#/definitions/basic_http_authenticator"
default_error_handler:
type: DefaultErrorHandler
max_retries: 5
backoff_strategies:
- type: ExponentialBackoffStrategy
factor: 30
response_filters:
- error_message_contains: "Unable to authenticate request"
action: FAIL
error_message: Authentication has failed. Please update your config with valid credentials.
- error_message_contains: "Query rate limit exceeded"
action: RATE_LIMITED
error_message: Query rate limit exceeded.
- http_codes: [500]
error_message_contains: "unknown error"
action: RETRY
error_message: An unknown error occurred
- error_message_contains: "to_date cannot be later than today"
action: FAIL
error_message: Your project timezone must be misconfigured. Please set it to the one defined in your Mixpanel project settings.
- http_codes: [400]
action: FAIL
error_message: Authentication has failed. Please update your config with valid credentials.
- http_codes: [402]
action: FAIL
error_message: Unable to perform a request. Payment Required.
- predicate: "{{ 'Retry-After' in headers }}"
action: RATE_LIMITED
error_message: Query rate limit exceeded.
requester:
type: CustomRequester
class_name: "source_mixpanel.components.MixpanelHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/"
path: "{{ parameters['path'] }}"
authenticator: "#/definitions/authenticator"
http_method: GET
request_parameters:
project_id: "{{ config['credentials']['project_id'] }}"
error_handler:
$ref: "#/definitions/default_error_handler"
selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path:
- "{{ parameters['extractor_field_path'] }}"
selector_empty_dpath:
type: RecordSelector
extractor:
type: DpathExtractor
field_path: []
retriever:
type: SimpleRetriever
requester:
$ref: "#/definitions/requester"
record_selector:
$ref: "#/definitions/selector"
partition_router: []
stream_base:
type: DeclarativeStream
primary_key: "id"
schema_loader:
$ref: "#/definitions/schema_loader"
retriever:
$ref: "#/definitions/retriever"
incremental_sync:
type: DatetimeBasedCursor
step: 'P{{ config.get("date_window_size", 30) }}D'
cursor_granularity: P1D
lookback_window: 'P{{ config.get("attribution_window", 5) }}D'
cursor_field: date
cursor_datetime_formats:
- "%Y-%m-%d"
- "%Y-%m-%d %H:%M:%S"
- "%Y-%m-%dT%H:%M:%S%z"
datetime_format: "%Y-%m-%d"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config.start_date or day_delta(-365, format='%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
start_time_option:
inject_into: request_parameter
field_name: from_date
type: RequestOption
end_time_option:
inject_into: request_parameter
field_name: to_date
type: RequestOption
end_datetime:
type: MinMaxDatetime
datetime: '{{ config.end_date or day_delta(-1, format="%Y-%m-%dT%H:%M:%SZ") }}'
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
# https://developer.mixpanel.com/reference/cohorts
cohorts_stream:
$ref: "#/definitions/stream_base"
$parameters:
name: cohorts
path: query/cohorts/list
field_path: []
retriever:
$ref: "#/definitions/retriever"
record_selector:
$ref: "#/definitions/selector_empty_dpath"
record_filter:
condition: "{{ record.created >= stream_interval.start_time }}"
incremental_sync:
type: DatetimeBasedCursor
cursor_field: created
cursor_datetime_formats:
- "%Y-%m-%dT%H:%M:%S"
- "%Y-%m-%d %H:%M:%S"
- "%Y-%m-%dT%H:%M:%SZ"
- "%Y-%m-%dT%H:%M:%S%z"
datetime_format: "%Y-%m-%d %H:%M:%S"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config.start_date or day_delta(-365, format='%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
paginator:
type: DefaultPaginator
pagination_strategy:
type: CustomPaginationStrategy
class_name: "source_mixpanel.components.EngagePaginationStrategy"
start_from_page: 1
page_size: '{{ config["page_size"] or 1000 }}'
page_token_option:
type: RequestOption
inject_into: request_parameter
field_name: page
page_size_option:
type: RequestOption
inject_into: request_parameter
field_name: page_size
# https://developer.mixpanel.com/reference/engage
engage_stream:
$ref: "#/definitions/stream_base"
primary_key: distinct_id
$parameters:
name: engage
extractor_field_path: results
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.EngagesHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/"
path: query/engage
authenticator: "#/definitions/authenticator"
error_handler:
$ref: "#/definitions/default_error_handler"
paginator:
$ref: "#/definitions/paginator"
record_selector:
$ref: "#/definitions/selector"
record_filter:
condition: "{{ record['$properties']['$last_seen'] >= stream_interval.start_time }}"
incremental_sync:
type: DatetimeBasedCursor
cursor_field: last_seen
cursor_datetime_formats:
- "%Y-%m-%dT%H:%M:%S"
- "%Y-%m-%dT%H:%M:%S%z"
datetime_format: "%Y-%m-%dT%H:%M:%S"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config.start_date or day_delta(-365, format='%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
transformations:
- class_name: "source_mixpanel.components.EngageTransformation"
- type: AddFields
fields:
- path:
- browser_version
value: "{{ record.browser_version | string if record.browser_version else '' }}"
schema_loader:
type: DynamicSchemaLoader
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.EngagesHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/query/"
path: engage/properties
authenticator: "#/definitions/authenticator"
error_handler:
$ref: "#/definitions/default_error_handler"
record_selector:
type: RecordSelector
extractor:
class_name: "source_mixpanel.components.EngagePropertiesDpathExtractor"
field_path:
- results
schema_transformations:
- type: KeysReplace
old: "$"
new: ""
- type: AddFields
fields:
- path: ["distinct_id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["browser"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["browser_version"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["city"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["country_code"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["region"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["timezone"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["last_seen"]
value: "{{ {'type': ['null', 'string'], 'format': 'date-time'} }}"
- path: ["email"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["first_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["last_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["unblocked"]
value: "{{ {'type': ['null', 'string']} }}"
schema_type_identifier:
type: SchemaTypeIdentifier
key_pointer: ["name"]
type_pointer: ["type"]
schema_pointer: []
types_mapping:
- type: TypesMap
# no format specified as values can be "2021-12-16T00:00:00", "1638298874", "15/08/53895"
target_type: string
current_type: datetime
- type: TypesMap
target_type: array
current_type: list
- type: TypesMap
target_type: string
current_type: unknown
cohort_members_stream:
$ref: "#/definitions/engage_stream"
primary_key:
- distinct_id
- cohort_id
$parameters:
name: cohort_members
path: query/engage
extractor_field_path: results
retriever:
$ref: "#/definitions/retriever"
requester:
$ref: "#/definitions/requester"
http_method: POST
paginator:
$ref: "#/definitions/paginator"
record_selector:
$ref: "#/definitions/selector"
record_filter:
condition: "{{ record['$properties']['$last_seen'] >= stream_interval.start_time }}"
partition_router:
class_name: "source_mixpanel.components.CohortMembersSubstreamPartitionRouter"
parent_stream_configs:
- type: ParentStreamConfig
stream: "#/definitions/cohorts_stream"
parent_key: id
partition_field: id
request_option:
inject_into: body_json
type: RequestOption
field_name: filter_by_cohort
transformations:
- class_name: "source_mixpanel.components.EngageTransformation"
- type: AddFields
fields:
- path:
- cohort_id
value: "{{ stream_partition.get('id') }}"
- type: AddFields
fields:
- path:
- browser_version
value: "{{ record.browser_version | string if record.browser_version else '' }}"
schema_loader:
$ref: "#/definitions/engage_stream/schema_loader"
schema_transformations:
- type: KeysReplace
old: "$"
new: ""
- type: AddFields
fields:
- path: ["cohort_id"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["distinct_id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["browser"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["browser_version"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["city"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["country_code"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["region"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["timezone"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["last_seen"]
value: "{{ {'type': ['null', 'string'], 'format': 'date-time'} }}"
- path: ["email"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["first_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["last_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["unblocked"]
value: "{{ {'type': ['null', 'string']} }}"
# No API docs! build based on singer source
revenue_stream:
$ref: "#/definitions/stream_base"
primary_key: "date"
$parameters:
name: revenue
path: query/engage/revenue
extractor_field_path: results
retriever:
$ref: "#/definitions/retriever"
record_selector:
$ref: "#/definitions/selector"
extractor:
class_name: "source_mixpanel.components.RevenueDpathExtractor"
field_path:
- "{{ parameters['extractor_field_path'] }}"
incremental_sync: "#/definitions/incremental_sync"
# https://developer.mixpanel.com/reference/list-all-annotations-for-project
annotations_stream:
$ref: "#/definitions/stream_base"
$parameters:
name: annotations
extractor_field_path: results
path: annotations
primary_key: "id"
retriever:
$ref: "#/definitions/retriever"
requester:
type: CustomRequester
class_name: "source_mixpanel.components.AnnotationsHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/"
path: |
{% set project_id = config.credentials.project_id %}
{% if project_id %}app/projects/{{project_id}}{% else %}query{% endif %}/annotations
authenticator: "#/definitions/authenticator"
error_handler:
$ref: "#/definitions/default_error_handler"
record_selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path:
- "{{ 'results' if config.credentials.project_id else 'annotations' }}"
# https://developer.mixpanel.com/reference/funnels-query
funnel_ids_stream:
type: DeclarativeStream
name: funnel_ids
primary_key:
- funnel_id
retriever:
type: SimpleRetriever
requester:
$ref: "#/definitions/requester"
path: query/funnels/list
http_method: GET
request_parameters:
project_id: "{{ config['credentials']['project_id'] }}"
record_selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path: []
transformations:
- type: AddFields
fields:
- path:
- funnel_id
value: "{{ record.funnel_id | string }}"
# https://developer.mixpanel.com/reference/funnels-query
funnels_stream:
type: DeclarativeStream
name: funnels
$parameters:
name: funnels
primary_key:
- funnel_id
- date
state_migrations:
- type: LegacyToPerPartitionStateMigration
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.FunnelsHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/"
path: query/funnels
authenticator: "#/definitions/authenticator"
error_handler:
$ref: "#/definitions/default_error_handler"
record_selector:
type: RecordSelector
extractor:
class_name: "source_mixpanel.components.FunnelsDpathExtractor"
field_path:
- data
partition_router:
type: CustomPartitionRouter
class_name: "source_mixpanel.components.FunnelsSubstreamPartitionRouter"
parent_stream_configs:
- type: ParentStreamConfig
parent_key: funnel_id
request_option:
type: RequestOption
field_name: funnel_id
inject_into: request_parameter
partition_field: funnel_id
stream: "#/definitions/funnel_ids_stream"
incremental_sync: "#/definitions/incremental_sync"
schema_loader:
$ref: "#/definitions/schema_loader"
transformations:
- type: AddFields
fields:
- path:
- funnel_id
value: "{{ stream_partition.get('funnel_id') }}"
- type: AddFields
fields:
- path:
- name
value: "{{ stream_slice.get('funnel_name') }}"
export_transformations:
- type: CustomTransformation
class_name: "source_mixpanel.components.PropertiesTransformation"
properties_field: "properties"
- type: DpathFlattenFields
delete_origin_value: true
field_path:
- "properties"
- type: AddFields
fields:
- path: ["time"]
value: "{{ format_datetime(record['time'], '%Y-%m-%dT%H:%M:%SZ') }}"
export_schema_loader:
type: DynamicSchemaLoader
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.MixpanelHttpRequester"
url_base: "https://{{ '' if config.region == 'US' else config.region+'.' }}mixpanel.com/api/query"
authenticator: "#/definitions/authenticator"
error_handler:
$ref: "#/definitions/default_error_handler"
path: events/properties/top
http_method: GET
record_selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path: []
schema_transformations:
- type: CustomTransformation
class_name: "source_mixpanel.components.PropertiesTransformation"
- type: AddFields
# Transformation is performed AFTER reading schema.
# If field, that transformation wants to add, already exists in schema it will be ignored.
# All fields are top level we are safe when using path[0]
condition: "{{ path[0] not in record.keys() }}"
fields:
- path: ["event"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["distinct_id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["insert_id"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["time"]
value: "{{ {'type': ['null', 'string'], 'format': 'date-time'} }}"
- path: ["browser"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["created"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["email"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["first_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["last_name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["initial_referrer"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["os"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Abandon Cart Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Account Created Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Add To Cart Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Affiliate"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Browse Count"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Browse Filter"]
value: "{{ {'type': ['null', 'array'], 'items': {'type': ['null', 'array']}} }}"
- path: ["Campaign Name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Campaign Source"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Card Type"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Cart Items"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Cart Size"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Cart Size (# of Items)"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Cart Value"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Complete Purchase Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Coupon"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Coupon Count Used"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Date of Last Item Detail View"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Delivery Day"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Delivery Fee"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Delivery Fees"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Delivery Method"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Delivery Method Added Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Gender"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Item Category"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Item Cost"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Item Detail Page Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Item Name"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Item Rating"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Items in Browse"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Landing Page Loaded Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Last Cart Abandonment"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Last Event"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Last Purchase"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Last Search"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Marketing A/B Test"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Misc Fee"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Misc Fees"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Number of Cards Added"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Number of Cart Abandons"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Number of Item Details Viewed"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Number of Purchases"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Number of Searches"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Page Version"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Payment Method Added Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Platform"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Registration Date"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Registration Method"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Review Payment Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Search Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Search Page"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Search Results Count"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["Search Term"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Suggested Item"]
value: "{{ {'type': ['null', 'boolean']} }}"
- path: ["Total Charge"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["UTM_Medium"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["UTM_Term"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["UTM_source"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Within Checkout Process"]
value: "{{ {'type': ['null', 'boolean']} }}"
- path: ["mp_lib"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["labels"]
value: "{{ {'type': ['null', 'array'], 'items': {'type': ['null', 'string']}} }}"
- path: ["sampling_factor"]
value: "{{ {'type': ['null', 'integer']} }}"
- path: ["dataset"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["Referred by"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["import"]
value: "{{ {'type': ['null', 'boolean']} }}"
- path: ["URL"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["mp_api_timestamp_ms"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["mp_api_endpoint"]
value: "{{ {'type': ['null', 'string']} }}"
- path: ["mp_processing_time_ms"]
value: "{{ {'type': ['null', 'string']} }}"
schema_type_identifier:
type: SchemaTypeIdentifier
key_pointer: []
type_pointer: []
schema_pointer: []
# https://developer.mixpanel.com/reference/raw-event-export
export_incremental_stream:
type: DeclarativeStream
name: export
$parameters:
name: export
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.ExportHttpRequester"
path: export
http_method: GET
url_base: "https://data{{ '-eu' if config.region == 'EU' else '' }}.mixpanel.com/api/2.0/"
authenticator: "#/definitions/authenticator"
error_handler:
type: CustomErrorHandler
class_name: "source_mixpanel.components.ExportErrorHandler"
max_retries: 10
backoff_strategies:
$ref: "#/definitions/default_error_handler/backoff_strategies"
response_filters:
$ref: "#/definitions/default_error_handler/response_filters"
record_selector:
type: RecordSelector
schema_normalization: Default
extractor:
class_name: "source_mixpanel.components.ExportDpathExtractor"
field_path: []
incremental_sync:
type: DatetimeBasedCursor
cursor_field: time
cursor_datetime_formats:
- "%Y-%m-%dT%H:%M:%S%z"
- "%Y-%m-%d %H:%M:%S"
- "%Y-%m-%d"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config.start_date or day_delta(-365, format='%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
transformations:
$ref: "#/definitions/export_transformations"
schema_loader:
$ref: "#/definitions/export_schema_loader"
# https://developer.mixpanel.com/reference/raw-event-export
export_full_refresh_stream:
type: DeclarativeStream
name: export
$parameters:
name: export
retriever:
type: SimpleRetriever
requester:
type: CustomRequester
class_name: "source_mixpanel.components.MixpanelHttpRequester"
path: export
http_method: GET
url_base: "https://data{{ '-eu' if config.region == 'EU' else '' }}.mixpanel.com/api/2.0/"
authenticator: "#/definitions/authenticator"
error_handler:
type: CustomErrorHandler
class_name: "source_mixpanel.components.ExportErrorHandler"
max_retries: 10
backoff_strategies:
$ref: "#/definitions/default_error_handler/backoff_strategies"
response_filters:
$ref: "#/definitions/default_error_handler/response_filters"
record_selector:
type: RecordSelector
schema_normalization: Default
extractor:
class_name: "source_mixpanel.components.ExportDpathExtractor"
field_path: []
incremental_sync:
type: DatetimeBasedCursor
cursor_field: time
step: "P{{ config.get('date_window_size', 30) }}D"
cursor_granularity: P1D
lookback_window: "PT{{ max(config.get('attribution_window', 5) * 20 * 60 * 60, config.get('export_lookback_window', 0)) }}S"
cursor_datetime_formats:
- "%Y-%m-%dT%H:%M:%S%z"
- "%Y-%m-%d %H:%M:%S"
- "%Y-%m-%d"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
start_datetime:
type: MinMaxDatetime
datetime: "{{ format_datetime(config.start_date, '%Y-%m-%dT%H:%M:%SZ') if config.get('start_date') else day_delta(-365, format='%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
end_datetime:
type: MinMaxDatetime
datetime: "{{ format_datetime(config.end_date, '%Y-%m-%dT%H:%M:%SZ') if config.get('end_date') else today_with_timezone(config.get('project_timezone', 'US/Pacific')).strftime('%Y-%m-%dT%H:%M:%SZ') }}"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
start_time_option:
type: RequestOption
field_name: from_date
inject_into: request_parameter
end_time_option:
type: RequestOption
field_name: to_date
inject_into: request_parameter
transformations:
$ref: "#/definitions/export_transformations"
schema_loader:
$ref: "#/definitions/export_schema_loader"
export_stream:
type: StateDelegatingStream
$parameters:
name: "export"
full_refresh_stream: "#/definitions/export_full_refresh_stream"
incremental_stream: "#/definitions/export_incremental_stream"
streams:
- "#/definitions/cohorts_stream"
- "#/definitions/engage_stream"
- "#/definitions/revenue_stream"
- "#/definitions/annotations_stream"
- "#/definitions/cohort_members_stream"
- "#/definitions/funnels_stream"
# TODO: export stream is using old python implementation
# see https://github.com/airbytehq/airbyte-internal-issues/issues/13587 for more context
# - "#/definitions/export_stream"
check:
type: CheckStream
stream_names:
- cohorts
api_budget:
type: HTTPAPIBudget
policies:
# -The Query API has a rate limit of 60 queries per hour and a maximum of 5 concurrent queries
# https://developer.mixpanel.com/reference/raw-event-export#api-export-endpoint-rate-limits
- type: MovingWindowCallRatePolicy
rates:
- limit: 1 # rate limit of 60 queries per hour
interval: PT60S
matchers: []
# concurrent streams: cohorts, engage, revenue, annotations, cohort_members, funnels
# synchronous streams: export
# https://developer.mixpanel.com/reference/rate-limits
# A maximum of 5 concurrent queries and 60 queries per hour.
concurrency_level:
type: ConcurrencyLevel
default_concurrency: "{{ config.get('num_workers', 3) }}"
max_concurrency: 25