1
0
mirror of synced 2025-12-25 02:09:19 -05:00

chore(source-the-guardian-api): format manifest (#58596)

This commit is contained in:
Christo Grabowski
2025-04-22 14:32:47 -07:00
committed by GitHub
parent 238aaf2370
commit 71d6ac8664
3 changed files with 199 additions and 344 deletions

View File

@@ -1,376 +1,230 @@
version: "4.3.2"
definitions:
selector:
extractor:
field_path:
- response
- results
requester:
url_base: "https://content.guardianapis.com"
http_method: "GET"
request_parameters:
api-key: "{{ config['api_key'] }}"
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
section: "{{ config['section'] }}"
order-by: "oldest"
incremental_sync:
type: DatetimeBasedCursor
start_datetime:
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
end_datetime:
datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}"
datetime_format: "%Y-%m-%d"
step: "P7D"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
cursor_granularity: "PT1S"
cursor_field: "webPublicationDate"
start_time_option:
field_name: "from-date"
inject_into: "request_parameter"
end_time_option:
field_name: "to-date"
inject_into: "request_parameter"
retriever:
record_selector:
extractor:
field_path:
- response
- results
paginator:
type: DefaultPaginator
pagination_strategy:
type: CustomPaginationStrategy
class_name: "source_declarative_manifest.components.CustomPageIncrement"
page_size: 10
page_token_option:
type: RequestOption
inject_into: "request_parameter"
field_name: "page"
page_size_option:
inject_into: "body_data"
field_name: "page_size"
requester:
url_base: "https://content.guardianapis.com"
http_method: "GET"
request_parameters:
api-key: "{{ config['api_key'] }}"
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
section: "{{ config['section'] }}"
order-by: "oldest"
base_stream:
incremental_sync:
type: DatetimeBasedCursor
start_datetime:
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
end_datetime:
datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}"
datetime_format: "%Y-%m-%d"
step: "P7D"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
cursor_granularity: "PT1S"
cursor_field: "webPublicationDate"
start_time_option:
field_name: "from-date"
inject_into: "request_parameter"
end_time_option:
field_name: "to-date"
inject_into: "request_parameter"
retriever:
record_selector:
extractor:
field_path:
- response
- results
paginator:
type: DefaultPaginator
pagination_strategy:
type: CustomPaginationStrategy
class_name: "source_declarative_manifest.components.CustomPageIncrement"
page_size: 10
page_token_option:
type: RequestOption
inject_into: "request_parameter"
field_name: "page"
page_size_option:
inject_into: "body_data"
field_name: "page_size"
requester:
url_base: "https://content.guardianapis.com"
http_method: "GET"
request_parameters:
api-key: "{{ config['api_key'] }}"
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
section: "{{ config['section'] }}"
order-by: "oldest"
content_stream:
incremental_sync:
type: DatetimeBasedCursor
start_datetime:
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
end_datetime:
datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}"
datetime_format: "%Y-%m-%d"
step: "P7D"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
cursor_granularity: "PT1S"
cursor_field: "webPublicationDate"
start_time_option:
field_name: "from-date"
inject_into: "request_parameter"
end_time_option:
field_name: "to-date"
inject_into: "request_parameter"
retriever:
record_selector:
extractor:
field_path:
- response
- results
paginator:
type: "DefaultPaginator"
pagination_strategy:
type: CustomPaginationStrategy
class_name: "source_declarative_manifest.components.CustomPageIncrement"
page_size: 10
page_token_option:
type: RequestOption
inject_into: "request_parameter"
field_name: "page"
page_size_option:
inject_into: "body_data"
field_name: "page_size"
requester:
url_base: "https://content.guardianapis.com"
http_method: "GET"
request_parameters:
api-key: "{{ config['api_key'] }}"
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
section: "{{ config['section'] }}"
order-by: "oldest"
schema_loader:
type: InlineSchemaLoader
schema:
$schema: http://json-schema.org/draft-04/schema#
type: object
properties:
id:
type: string
type:
type: string
sectionId:
type: string
sectionName:
type: string
webPublicationDate:
type: string
webTitle:
type: string
webUrl:
type: string
apiUrl:
type: string
isHosted:
type: boolean
pillarId:
type: string
pillarName:
type: string
required:
- id
- type
- sectionId
- sectionName
- webPublicationDate
- webTitle
- webUrl
- apiUrl
- isHosted
- pillarId
- pillarName
streams:
- incremental_sync:
type: DatetimeBasedCursor
start_datetime:
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
type: MinMaxDatetime
end_datetime:
datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}"
datetime_format: "%Y-%m-%d"
type: MinMaxDatetime
step: "P7D"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
cursor_granularity: "PT1S"
cursor_field: "webPublicationDate"
start_time_option:
field_name: "from-date"
inject_into: "request_parameter"
type: RequestOption
end_time_option:
field_name: "to-date"
inject_into: "request_parameter"
type: RequestOption
retriever:
record_selector:
extractor:
field_path:
- response
- results
type: DpathExtractor
type: RecordSelector
paginator:
type: "DefaultPaginator"
pagination_strategy:
class_name: source_declarative_manifest.components.CustomPageIncrement
page_size: 10
type: CustomPaginationStrategy
page_token_option:
type: RequestOption
inject_into: "request_parameter"
field_name: "page"
page_size_option:
inject_into: "body_data"
field_name: "page_size"
type: RequestOption
requester:
url_base: "https://content.guardianapis.com"
http_method: "GET"
request_parameters:
api-key: "{{ config['api_key'] }}"
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
section: "{{ config['section'] }}"
order-by: "oldest"
type: HttpRequester
path: "/search"
type: SimpleRetriever
schema_loader:
type: InlineSchemaLoader
schema:
$schema: http://json-schema.org/draft-04/schema#
type: object
properties:
id:
type: string
type:
type: string
sectionId:
type: string
sectionName:
type: string
webPublicationDate:
type: string
webTitle:
type: string
webUrl:
type: string
apiUrl:
type: string
isHosted:
type: boolean
pillarId:
type: string
pillarName:
type: string
required:
- id
- type
- sectionId
- sectionName
- webPublicationDate
- webTitle
- webUrl
- apiUrl
- isHosted
- pillarId
- pillarName
type: DeclarativeStream
name: "content"
primary_key: "id"
check:
stream_names:
- "content"
type: CheckStream
version: 6.44.0
type: DeclarativeSource
description: >-
Website: https://open-platform.theguardian.com/
API Reference:
https://open-platform.theguardian.com/documentation/
check:
type: CheckStream
stream_names:
- content
definitions:
streams:
content:
type: DeclarativeStream
name: content
primary_key:
- id
retriever:
type: SimpleRetriever
requester:
$ref: "#/definitions/base_requester"
path: /search
http_method: GET
request_parameters:
q: "{{ config['query'] }}"
tag: "{{ config['tag'] }}"
api-key: "{{ config['api_key'] }}"
section: "{{ config['section'] }}"
order-by: oldest
record_selector:
type: RecordSelector
extractor:
type: DpathExtractor
field_path:
- response
- results
paginator:
type: DefaultPaginator
page_size_option:
type: RequestOption
field_name: page_size
inject_into: body_data
page_token_option:
type: RequestOption
field_name: page
inject_into: request_parameter
pagination_strategy:
type: CustomPaginationStrategy
page_size: 10
class_name: source_declarative_manifest.components.CustomPageIncrement
decoder:
type: JsonDecoder
incremental_sync:
type: DatetimeBasedCursor
cursor_field: webPublicationDate
cursor_datetime_formats:
- "%Y-%m-%dT%H:%M:%SZ"
datetime_format: "%Y-%m-%dT%H:%M:%SZ"
start_datetime:
type: MinMaxDatetime
datetime: "{{ config['start_date'] }}"
datetime_format: "%Y-%m-%d"
start_time_option:
type: RequestOption
field_name: from-date
inject_into: request_parameter
end_time_option:
type: RequestOption
field_name: to-date
inject_into: request_parameter
end_datetime:
type: MinMaxDatetime
datetime: "{{ config['end_date'] or now_utc().strftime('%Y-%m-%d') }}"
datetime_format: "%Y-%m-%d"
step: P7D
cursor_granularity: PT1S
schema_loader:
type: InlineSchemaLoader
schema:
$ref: "#/schemas/content"
base_requester:
type: HttpRequester
url_base: https://content.guardianapis.com
streams:
- $ref: "#/definitions/streams/content"
spec:
type: Spec
documentation_url: https://docs.airbyte.com/integrations/sources/the-guardian-api
connection_specification:
$schema: http://json-schema.org/draft-07/schema#
title: The Guardian Api Spec
type: object
$schema: http://json-schema.org/draft-07/schema#
required:
- api_key
- start_date
additionalProperties: true
properties:
api_key:
title: API Key
tag:
type: string
description:
Your API Key. See <a href="https://open-platform.theguardian.com/access/">here</a>.
The key is case sensitive.
airbyte_secret: true
start_date:
title: Start Date
type: string
description:
Use this to set the minimum date (YYYY-MM-DD) of the results.
Results older than the start_date will not be shown.
pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$
description: >-
(Optional) A tag is a piece of data that is used by The Guardian to
categorise content. Use this parameter to filter results by showing
only the ones matching the entered tag. See <a
href="https://content.guardianapis.com/tags?api-key=test">here</a> for
a list of all tags, and <a
href="https://open-platform.theguardian.com/documentation/tag">here</a>
for the tags endpoint documentation.
title: Tag
examples:
- YYYY-MM-DD
- environment/recycling
- environment/plasticbags
- environment/energyefficiency
order: 0
query:
title: Query
type: string
description:
(Optional) The query (q) parameter filters the results to only
those that include that search term. The q parameter supports AND, OR and
description: >-
(Optional) The query (q) parameter filters the results to only those
that include that search term. The q parameter supports AND, OR and
NOT operators.
title: Query
examples:
- environment AND NOT water
- environment AND political
- amusement park
- political
tag:
title: Tag
order: 1
api_key:
type: string
description:
(Optional) A tag is a piece of data that is used by The Guardian
to categorise content. Use this parameter to filter results by showing only
the ones matching the entered tag. See <a href="https://content.guardianapis.com/tags?api-key=test">here</a>
for a list of all tags, and <a href="https://open-platform.theguardian.com/documentation/tag">here</a>
for the tags endpoint documentation.
examples:
- environment/recycling
- environment/plasticbags
- environment/energyefficiency
description: >-
Your API Key. See <a
href="https://open-platform.theguardian.com/access/">here</a>. The key
is case sensitive.
title: API Key
airbyte_secret: true
order: 2
section:
title: Section
type: string
description:
(Optional) Use this to filter the results by a particular section.
See <a href="https://content.guardianapis.com/sections?api-key=test">here</a>
for a list of all sections, and <a href="https://open-platform.theguardian.com/documentation/section">here</a>
description: >-
(Optional) Use this to filter the results by a particular section. See
<a
href="https://content.guardianapis.com/sections?api-key=test">here</a>
for a list of all sections, and <a
href="https://open-platform.theguardian.com/documentation/section">here</a>
for the sections endpoint documentation.
title: Section
examples:
- media
- technology
- housing-network
order: 3
end_date:
title: End Date
type: string
description:
description: >-
(Optional) Use this to set the maximum date (YYYY-MM-DD) of the
results. Results newer than the end_date will not be shown. Default is set
to the current date (today) for incremental syncs.
results. Results newer than the end_date will not be shown. Default is
set to the current date (today) for incremental syncs.
title: End Date
pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$
examples:
- YYYY-MM-DD
order: 4
start_date:
type: string
description: >-
Use this to set the minimum date (YYYY-MM-DD) of the results. Results
older than the start_date will not be shown.
title: Start Date
pattern: ^([1-9][0-9]{3})\-(0?[1-9]|1[012])\-(0?[1-9]|[12][0-9]|3[01])$
examples:
- YYYY-MM-DD
order: 5
additionalProperties: true
metadata:
autoImportSchema:
content: false
testedStreams:
content:
streamHash: 42367479876486b61ffefebbbe8bd86c341dc6d2
hasResponse: true
responsesAreSuccessful: true
hasRecords: true
primaryKeysArePresent: true
primaryKeysAreUnique: true
assist: {}
schemas:
content:
type: object
$schema: http://json-schema.org/draft-04/schema#
properties:
type:
type: string
apiUrl:
type: string
id:
type: string
isHosted:
type: boolean
pillarId:
type: string
pillarName:
type: string
sectionId:
type: string
sectionName:
type: string
webPublicationDate:
type: string
webTitle:
type: string
webUrl:
type: string
required:
- id
- type
- sectionId
- sectionName
- webPublicationDate
- webTitle
- webUrl
- apiUrl
- isHosted
- pillarId
- pillarName
additionalProperties: true

View File

@@ -20,7 +20,7 @@ data:
type: GSM
connectorType: source
definitionId: d42bd69f-6bf0-4d0b-9209-16231af07a92
dockerImageTag: 0.2.21
dockerImageTag: 0.2.22
dockerRepository: airbyte/source-the-guardian-api
documentationUrl: https://docs.airbyte.com/integrations/sources/the-guardian-api
githubIssueLabel: source-the-guardian-api

View File

@@ -113,6 +113,7 @@ The key that you are assigned is rate-limited and as such any applications that
| Version | Date | Pull Request | Subject |
|:--------|:-----------| :------------------------------------------------------- |:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0.2.22 | 2025-04-22 | [58596](https://github.com/airbytehq/airbyte/pull/58596) | Manifest cleanup |
| 0.2.21 | 2025-04-12 | [57938](https://github.com/airbytehq/airbyte/pull/57938) | Update dependencies |
| 0.2.20 | 2025-04-05 | [57416](https://github.com/airbytehq/airbyte/pull/57416) | Update dependencies |
| 0.2.19 | 2025-03-29 | [56904](https://github.com/airbytehq/airbyte/pull/56904) | Update dependencies |