# -*- coding: utf-8 -*-
from __future__ import absolute_import, unicode_literals

import re
from datetime import datetime, timedelta

from dateutil import tz

from arrow import locales

try:
    from functools import lru_cache
except ImportError:  # pragma: no cover
    from backports.functools_lru_cache import lru_cache  # pragma: no cover


class ParserError(RuntimeError):
    pass


class DateTimeParser(object):

    _FORMAT_RE = re.compile(
        r"(YYY?Y?|MM?M?M?|Do|DD?D?D?|d?d?d?d|HH?|hh?|mm?|ss?|S+|ZZ?Z?|a|A|X)"
    )
    _ESCAPE_RE = re.compile(r"\[[^\[\]]*\]")

    _ONE_OR_MORE_DIGIT_RE = re.compile(r"\d+")
    _ONE_OR_TWO_DIGIT_RE = re.compile(r"\d{1,2}")
    _FOUR_DIGIT_RE = re.compile(r"\d{4}")
    _TWO_DIGIT_RE = re.compile(r"\d{2}")
    _TZ_RE = re.compile(r"[+\-]?\d{2}:?(\d{2})?")
    _TZ_NAME_RE = re.compile(r"\w[\w+\-/]+")

    _BASE_INPUT_RE_MAP = {
        "YYYY": _FOUR_DIGIT_RE,
        "YY": _TWO_DIGIT_RE,
        "MM": _TWO_DIGIT_RE,
        "M": _ONE_OR_TWO_DIGIT_RE,
        "DD": _TWO_DIGIT_RE,
        "D": _ONE_OR_TWO_DIGIT_RE,
        "HH": _TWO_DIGIT_RE,
        "H": _ONE_OR_TWO_DIGIT_RE,
        "hh": _TWO_DIGIT_RE,
        "h": _ONE_OR_TWO_DIGIT_RE,
        "mm": _TWO_DIGIT_RE,
        "m": _ONE_OR_TWO_DIGIT_RE,
        "ss": _TWO_DIGIT_RE,
        "s": _ONE_OR_TWO_DIGIT_RE,
        "X": re.compile(r"\d+"),
        "ZZZ": _TZ_NAME_RE,
        "ZZ": _TZ_RE,
        "Z": _TZ_RE,
        "S": _ONE_OR_MORE_DIGIT_RE,
    }

    MARKERS = ["YYYY", "MM", "DD"]
    SEPARATORS = ["-", "/", "."]

    def __init__(self, locale="en_us", cache_size=0):

        self.locale = locales.get_locale(locale)
        self._input_re_map = self._BASE_INPUT_RE_MAP.copy()
        self._input_re_map.update(
            {
                "MMMM": self._generate_choice_re(
                    self.locale.month_names[1:], re.IGNORECASE
                ),
                "MMM": self._generate_choice_re(
                    self.locale.month_abbreviations[1:], re.IGNORECASE
                ),
                "Do": re.compile(self.locale.ordinal_day_re),
                "dddd": self._generate_choice_re(
                    self.locale.day_names[1:], re.IGNORECASE
                ),
                "ddd": self._generate_choice_re(
                    self.locale.day_abbreviations[1:], re.IGNORECASE
                ),
                "d": re.compile(r"[1-7]"),
                "a": self._generate_choice_re(
                    (self.locale.meridians["am"], self.locale.meridians["pm"])
                ),
                # note: 'A' token accepts both 'am/pm' and 'AM/PM' formats to
                # ensure backwards compatibility of this token
                "A": self._generate_choice_re(self.locale.meridians.values()),
            }
        )
        if cache_size > 0:
            self._generate_pattern_re = lru_cache(maxsize=cache_size)(
                self._generate_pattern_re
            )

    def parse_iso(self, string):

        has_time = "T" in string or " " in string.strip()
        space_divider = " " in string.strip()

        if has_time:
            if space_divider:
                date_string, time_string = string.split(" ", 1)
            else:
                date_string, time_string = string.split("T", 1)
            time_parts = re.split("[+-]", time_string, 1)
            has_tz = len(time_parts) > 1
            has_seconds = time_parts[0].count(":") > 1
            has_subseconds = re.search("[.,]", time_parts[0])

            if has_subseconds:
                formats = ["YYYY-MM-DDTHH:mm:ss%sS" % has_subseconds.group()]
            elif has_seconds:
                formats = ["YYYY-MM-DDTHH:mm:ss"]
            else:
                formats = ["YYYY-MM-DDTHH:mm"]
        else:
            has_tz = False
            # generate required formats: YYYY-MM-DD, YYYY-MM-DD, YYYY
            # using various separators: -, /, .
            len_markers = len(self.MARKERS)
            formats = [
                separator.join(self.MARKERS[: len_markers - i])
                for i in range(len_markers)
                for separator in self.SEPARATORS
            ]

        if has_time and has_tz:
            formats = [f + "Z" for f in formats]

        if space_divider:
            formats = [item.replace("T", " ", 1) for item in formats]

        return self._parse_multiformat(string, formats)

    def _generate_pattern_re(self, fmt):

        # fmt is a string of tokens like 'YYYY-MM-DD'
        # we construct a new string by replacing each
        # token by its pattern:
        # 'YYYY-MM-DD' -> '(?P<YYYY>\d{4})-(?P<MM>\d{2})-(?P<DD>\d{2})'
        tokens = []
        offset = 0

        # Escape all special RegEx chars
        escaped_fmt = re.escape(fmt)

        # Extract the bracketed expressions to be reinserted later.
        escaped_fmt = re.sub(self._ESCAPE_RE, "#", escaped_fmt)
        # Any number of S is the same as one.
        escaped_fmt = re.sub("S+", "S", escaped_fmt)
        escaped_data = re.findall(self._ESCAPE_RE, fmt)

        fmt_pattern = escaped_fmt

        for m in self._FORMAT_RE.finditer(escaped_fmt):
            token = m.group(0)
            try:
                input_re = self._input_re_map[token]
            except KeyError:
                raise ParserError("Unrecognized token '{}'".format(token))
            input_pattern = "(?P<{}>{})".format(token, input_re.pattern)
            tokens.append(token)
            # a pattern doesn't have the same length as the token
            # it replaces! We keep the difference in the offset variable.
            # This works because the string is scanned left-to-right and matches
            # are returned in the order found by finditer.
            fmt_pattern = (
                fmt_pattern[: m.start() + offset]
                + input_pattern
                + fmt_pattern[m.end() + offset :]
            )
            offset += len(input_pattern) - (m.end() - m.start())

        final_fmt_pattern = ""
        a = fmt_pattern.split(r"\#")
        b = escaped_data

        # Due to the way Python splits, 'a' will always be longer
        for i in range(len(a)):
            final_fmt_pattern += a[i]
            if i < len(b):
                final_fmt_pattern += b[i][1:-1]

        return tokens, re.compile(final_fmt_pattern, flags=re.IGNORECASE)

    def parse(self, string, fmt):

        if isinstance(fmt, list):
            return self._parse_multiformat(string, fmt)

        fmt_tokens, fmt_pattern_re = self._generate_pattern_re(fmt)

        match = fmt_pattern_re.search(string)
        if match is None:
            raise ParserError(
                "Failed to match '{}' when parsing '{}'".format(
                    fmt_pattern_re.pattern, string
                )
            )
        parts = {}
        for token in fmt_tokens:
            if token == "Do":
                value = match.group("value")
            else:
                value = match.group(token)
            self._parse_token(token, value, parts)
        return self._build_datetime(parts)

    def _parse_token(self, token, value, parts):

        if token == "YYYY":
            parts["year"] = int(value)
        elif token == "YY":
            value = int(value)
            parts["year"] = 1900 + value if value > 68 else 2000 + value

        elif token in ["MMMM", "MMM"]:
            parts["month"] = self.locale.month_number(value.lower())

        elif token in ["MM", "M"]:
            parts["month"] = int(value)

        elif token in ["DD", "D"]:
            parts["day"] = int(value)

        elif token in ["Do"]:
            parts["day"] = int(value)

        elif token.upper() in ["HH", "H"]:
            parts["hour"] = int(value)

        elif token in ["mm", "m"]:
            parts["minute"] = int(value)

        elif token in ["ss", "s"]:
            parts["second"] = int(value)

        elif token == "S":
            # We have the *most significant* digits of an arbitrary-precision integer.
            # We want the six most significant digits as an integer, rounded.
            # FIXME: add nanosecond support somehow?
            value = value.ljust(7, str("0"))

            # floating-point (IEEE-754) defaults to half-to-even rounding
            seventh_digit = int(value[6])
            if seventh_digit == 5:
                rounding = int(value[5]) % 2
            elif seventh_digit > 5:
                rounding = 1
            else:
                rounding = 0

            parts["microsecond"] = int(value[:6]) + rounding

        elif token == "X":
            parts["timestamp"] = int(value)

        elif token in ["ZZZ", "ZZ", "Z"]:
            parts["tzinfo"] = TzinfoParser.parse(value)

        elif token in ["a", "A"]:
            if value in (self.locale.meridians["am"], self.locale.meridians["AM"]):
                parts["am_pm"] = "am"
            elif value in (self.locale.meridians["pm"], self.locale.meridians["PM"]):
                parts["am_pm"] = "pm"

    @staticmethod
    def _build_datetime(parts):

        timestamp = parts.get("timestamp")

        if timestamp:
            tz_utc = tz.tzutc()
            return datetime.fromtimestamp(timestamp, tz=tz_utc)

        am_pm = parts.get("am_pm")
        hour = parts.get("hour", 0)

        if am_pm == "pm" and hour < 12:
            hour += 12
        elif am_pm == "am" and hour == 12:
            hour = 0

        # account for rounding up to 1000000
        microsecond = parts.get("microsecond", 0)
        if microsecond == 1000000:
            microsecond = 0
            second_increment = 1
        else:
            second_increment = 0

        increment = timedelta(seconds=second_increment)

        return (
            datetime(
                year=parts.get("year", 1),
                month=parts.get("month", 1),
                day=parts.get("day", 1),
                hour=hour,
                minute=parts.get("minute", 0),
                second=parts.get("second", 0),
                microsecond=microsecond,
                tzinfo=parts.get("tzinfo"),
            )
            + increment
        )

    def _parse_multiformat(self, string, formats):

        _datetime = None

        for fmt in formats:
            try:
                _datetime = self.parse(string, fmt)
                break
            except ParserError:
                pass

        if _datetime is None:
            raise ParserError(
                "Could not match input to any of {} on '{}'".format(formats, string)
            )

        return _datetime

    @staticmethod
    def _map_lookup(input_map, key):

        try:
            return input_map[key]
        except KeyError:
            raise ParserError('Could not match "{}" to {}'.format(key, input_map))

    @staticmethod
    def _try_timestamp(string):

        try:
            return float(string)
        except Exception:
            return None

    # generates a capture group of choices separated by an OR operator
    @staticmethod
    def _generate_choice_re(choices, flags=0):
        return re.compile(r"({})".format("|".join(choices)), flags=flags)


class TzinfoParser(object):

    _TZINFO_RE = re.compile(r"([+\-])?(\d\d):?(\d\d)?")

    @classmethod
    def parse(cls, string):

        tzinfo = None

        if string == "local":
            tzinfo = tz.tzlocal()

        elif string in ["utc", "UTC"]:
            tzinfo = tz.tzutc()

        else:

            iso_match = cls._TZINFO_RE.match(string)

            if iso_match:
                sign, hours, minutes = iso_match.groups()
                if minutes is None:
                    minutes = 0
                seconds = int(hours) * 3600 + int(minutes) * 60

                if sign == "-":
                    seconds *= -1

                tzinfo = tz.tzoffset(None, seconds)

            else:
                tzinfo = tz.gettz(string)

        if tzinfo is None:
            raise ParserError('Could not parse timezone expression "{}"'.format(string))

        return tzinfo