#
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
#


from abc import ABC
from calendar import timegm
from datetime import datetime
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple

import feedparser
import pytz
import requests
from airbyte_cdk.sources import AbstractSource
from airbyte_cdk.sources.streams import Stream
from airbyte_cdk.sources.streams.http import HttpStream
from dateutil.parser import parse

item_keys = [
    "title",
    "link",
    "description",
    "author",
    "category",
    "comments",
    "enclosure",
    "guid",
]


def convert_item_to_mapping(item) -> Mapping:
    mapping = {}

    for item_key in item_keys:
        try:
            mapping[item_key] = item[item_key]
        except (AttributeError, KeyError):
            pass

    try:
        # get datetime in UTC
        dt = datetime.utcfromtimestamp(timegm(item.published_parsed))
        # make sure that the output string is labeled as UTC
        dt_tz = dt.replace(tzinfo=pytz.UTC)
        mapping["published"] = dt_tz.isoformat()
    except (AttributeError, KeyError):
        pass

    return mapping


def is_newer(item, initial_state_date) -> bool:
    try:
        current_record_date = parse(item["published"])
    except Exception:
        current_record_date = None

    if initial_state_date is None:
        # if we don't have initial state they are all new
        return True
    elif current_record_date is None:
        # if we can't parse the item timestamp, we should return it
        return True
    else:
        return current_record_date > initial_state_date


# Basic stream
class RssStream(HttpStream, ABC):
    # empty URL base since the stream can have its own full URL
    url_base = ""

    def next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
        # no pagination enabled
        return None

    # since we only have one response for the stream, we should only return records newer than the initial state object if incremental
    def parse_response(self, response: requests.Response, stream_state: MutableMapping[str, Any], **kwargs) -> Iterable[Mapping]:
        feed = feedparser.parse(response.text)

        try:
            initial_state_date = parse(stream_state["published"])
        except Exception:
            initial_state_date = None

        # go through in reverse order which helps the state comparisons
        all_item_mappings = [convert_item_to_mapping(item) for item in feed.entries[::-1]]

        # will only filter if we have a state object, so it's incremental
        yield from [item for item in all_item_mappings if is_newer(item, initial_state_date)]


# Basic incremental stream
class IncrementalRssStream(RssStream, ABC):
    # no reason to checkpoint if it's reading individual files without pagination
    state_checkpoint_interval = None

    @property
    def cursor_field(self) -> str:
        return "published"

    # this will fail if the dates aren't parseable, but that means incremental isn't possible anyway for that feed
    def get_updated_state(self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]) -> Mapping[str, Any]:
        try:
            latest_record_date = parse(latest_record["published"])
            latest_record_state = {"published": latest_record["published"]}
        except Exception:
            latest_record_date = None

        try:
            current_record_date = parse(current_stream_state["published"])
        except Exception:
            current_record_date = None

        if latest_record_date and current_record_date:
            if latest_record_date > current_record_date:
                return latest_record_state
            else:
                return current_stream_state
        if latest_record_date:
            return latest_record_state
        if current_record_date:
            return current_stream_state
        else:
            return {}


class Items(IncrementalRssStream):
    def __init__(self, url: str):
        super().__init__()
        self.url = url

    primary_key = None

    def path(
        self, stream_state: Mapping[str, Any] = None, stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None
    ) -> str:
        return self.url


# Source
class SourceRss(AbstractSource):
    def check_connection(self, logger, config) -> Tuple[bool, any]:
        try:
            resp = requests.get(config.get("url"))
            status = resp.status_code
            if status == 200:
                return True, None
            else:
                return False, f"Unable to connect to RSS Feed (received status code: {status})"
        except Exception as e:
            return False, e

    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
        return [Items(config.get("url"))]