1
0
mirror of synced 2026-01-02 12:02:47 -05:00
Files
airbyte/airbyte-integrations/connectors/source-mongodb/lib/mongodb_reader.rb
Yury Koleda b1061e32d9 🎉 Add MongoDB Source
Signed-off-by: fut <fut.wrk@gmail.com>
2021-03-08 14:27:14 -08:00

69 lines
1.9 KiB
Ruby

require_relative './airbyte_protocol.rb'
require_relative './airbyte_logger.rb'
require_relative './mongodb_stream.rb'
require_relative './mongodb_types_converter.rb'
require_relative './mongodb_configured_stream/factory.rb'
class MongodbReader
BATCH_SIZE = 10_000
LOG_BATCH_SIZE = 10_000
def initialize(client:, catalog:, state:)
@client = client
@catalog = catalog
@state = state
end
def read
@catalog['streams'].each do |configured_stream|
wrapper = MongodbConfiguredStream::Factory.build(configured_stream: configured_stream, state: @state, client: @client)
AirbyteLogger.log("Reading stream #{wrapper.stream_name} in #{wrapper.sync_mode} mode")
if wrapper.valid?
read_configured_stream(wrapper)
end
end
end
private
def read_configured_stream(wrapper)
collection = @client[wrapper.stream_name]
projection_config = wrapper.stream['json_schema']['properties'].keys.each_with_object({}) do |key, obj|
obj[key] = 1
end
full_count = collection.count
collection.find(wrapper.compose_query).projection(projection_config).batch_size(BATCH_SIZE).each do |item|
item.each_pair do |key, value|
item[key] = MongodbTypesConverter.convert_value_to_type(value, wrapper.stream['json_schema']['properties'][key]['type'])
end
record = AirbyteRecordMessage.from_dynamic!({
"data" => item,
"emitted_at" => Time.now.to_i * 1000,
"stream" => wrapper.stream_name,
})
message = AirbyteMessage.from_dynamic!({
'type' => Type::Record,
'record' => record.to_dynamic,
})
puts message.to_json
wrapper.after_item_processed(item)
if wrapper.processed_count % LOG_BATCH_SIZE == 0
AirbyteLogger.log("[#{wrapper.processed_count}/#{full_count}}] Reading stream #{wrapper.stream_name} is in progress")
end
end
wrapper.after_stream_processed
end
end