AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/tsa/base/datetools.py

"""
Tools for working with dates
"""
from statsmodels.compat.python import asstr, lmap, lrange, lzip

import datetime
import re

import numpy as np
from pandas import to_datetime

_quarter_to_day = {
        "1" : (3, 31),
        "2" : (6, 30),
        "3" : (9, 30),
        "4" : (12, 31),
        "I" : (3, 31),
        "II" : (6, 30),
        "III" : (9, 30),
        "IV" : (12, 31)
        }


_mdays = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
_months_with_days = lzip(lrange(1,13), _mdays)
_month_to_day = dict(zip(map(str,lrange(1,13)), _months_with_days))
_month_to_day.update(dict(zip(["I", "II", "III", "IV", "V", "VI",
                               "VII", "VIII", "IX", "X", "XI", "XII"],
                               _months_with_days)))

# regex patterns
_y_pattern = r'^\d?\d?\d?\d$'

_q_pattern = r'''
^               # beginning of string
\d?\d?\d?\d     # match any number 1-9999, includes leading zeros

(:?q)           # use q or a : as a separator

([1-4]|(I{1,3}V?)) # match 1-4 or I-IV roman numerals

$               # end of string
'''

_m_pattern = r'''
^               # beginning of string
\d?\d?\d?\d     # match any number 1-9999, includes leading zeros

(:?m)           # use m or a : as a separator

(([1-9][0-2]?)|(I?XI{0,2}|I?VI{0,3}|I{1,3}))  # match 1-12 or
                                              # I-XII roman numerals

$               # end of string
'''


#NOTE: see also ts.extras.isleapyear, which accepts a sequence
def _is_leap(year):
    year = int(year)
    return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)


def date_parser(timestr, parserinfo=None, **kwargs):
    """
    Uses dateutil.parser.parse, but also handles monthly dates of the form
    1999m4, 1999:m4, 1999:mIV, 1999mIV and the same for quarterly data
    with q instead of m. It is not case sensitive. The default for annual
    data is the end of the year, which also differs from dateutil.
    """
    flags = re.IGNORECASE | re.VERBOSE
    if re.search(_q_pattern, timestr, flags):
        y,q = timestr.replace(":","").lower().split('q')
        month, day = _quarter_to_day[q.upper()]
        year = int(y)
    elif re.search(_m_pattern, timestr, flags):
        y,m = timestr.replace(":","").lower().split('m')
        month, day = _month_to_day[m.upper()]
        year = int(y)
        if _is_leap(y) and month == 2:
            day += 1
    elif re.search(_y_pattern, timestr, flags):
        month, day = 12, 31
        year = int(timestr)
    else:
        return to_datetime(timestr, **kwargs)

    return datetime.datetime(year, month, day)


def date_range_str(start, end=None, length=None):
    """
    Returns a list of abbreviated date strings.

    Parameters
    ----------
    start : str
        The first abbreviated date, for instance, '1965q1' or '1965m1'
    end : str, optional
        The last abbreviated date if length is None.
    length : int, optional
        The length of the returned array of end is None.

    Returns
    -------
    date_range : list
        List of strings
    """
    flags = re.IGNORECASE | re.VERBOSE

    start = start.lower()
    if re.search(_m_pattern, start, flags):
        annual_freq = 12
        split = 'm'
    elif re.search(_q_pattern, start, flags):
        annual_freq = 4
        split = 'q'
    elif re.search(_y_pattern, start, flags):
        annual_freq = 1
        start += 'a1' # hack
        if end:
            end += 'a1'
        split = 'a'
    else:
        raise ValueError("Date %s not understood" % start)
    yr1, offset1 = lmap(int, start.replace(":","").split(split))
    if end is not None:
        end = end.lower()
        yr2, offset2 = lmap(int, end.replace(":","").split(split))
    else:  # length > 0
        if not length:
            raise ValueError("length must be provided if end is None")
        yr2 = yr1 + length // annual_freq
        offset2 = length % annual_freq + (offset1 - 1)
    years = [str(yr) for yr in np.repeat(lrange(yr1 + 1, yr2), annual_freq)]
    # tack on first year
    years = [(str(yr1))] * (annual_freq + 1 - offset1) + years
    # tack on last year
    years = years + [(str(yr2))] * offset2
    if split != 'a':
        offset = np.tile(np.arange(1, annual_freq + 1), yr2 - yr1 - 1).astype("S2")
        offset = np.r_[np.arange(offset1, annual_freq + 1).astype('S2'), offset]
        offset = np.r_[offset, np.arange(1, offset2 + 1).astype('S2')]
        date_arr_range = [''.join([i, split, asstr(j)])
                          for i, j in zip(years, offset)]
    else:
        date_arr_range = years
    return date_arr_range


def dates_from_str(dates):
    """
    Turns a sequence of date strings and returns a list of datetime.

    Parameters
    ----------
    dates : array_like
        A sequence of abbreviated dates as string. For instance,
        '1996m1' or '1996Q1'. The datetime dates are at the end of the
        period.

    Returns
    -------
    date_list : ndarray
        A list of datetime types.
    """
    return lmap(date_parser, dates)


def dates_from_range(start, end=None, length=None):
    """
    Turns a sequence of date strings and returns a list of datetime.

    Parameters
    ----------
    start : str
        The first abbreviated date, for instance, '1965q1' or '1965m1'
    end : str, optional
        The last abbreviated date if length is None.
    length : int, optional
        The length of the returned array of end is None.

    Examples
    --------
    >>> import statsmodels.api as sm
    >>> import pandas as pd
    >>> nobs = 50
    >>> dates = pd.date_range('1960m1', length=nobs)


    Returns
    -------
    date_list : ndarray
        A list of datetime types.
    """
    dates = date_range_str(start, end, length)
    return dates_from_str(dates)
lab 1 is done 2024-10-02 22:15:59 +04:00			`"""`
			`Tools for working with dates`
			`"""`
			`from statsmodels.compat.python import asstr, lmap, lrange, lzip`

			`import datetime`
			`import re`

			`import numpy as np`
			`from pandas import to_datetime`

			`_quarter_to_day = {`
			`"1" : (3, 31),`
			`"2" : (6, 30),`
			`"3" : (9, 30),`
			`"4" : (12, 31),`
			`"I" : (3, 31),`
			`"II" : (6, 30),`
			`"III" : (9, 30),`
			`"IV" : (12, 31)`
			`}`


			`_mdays = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]`
			`_months_with_days = lzip(lrange(1,13), _mdays)`
			`_month_to_day = dict(zip(map(str,lrange(1,13)), _months_with_days))`
			`_month_to_day.update(dict(zip(["I", "II", "III", "IV", "V", "VI",`
			`"VII", "VIII", "IX", "X", "XI", "XII"],`
			`_months_with_days)))`

			`# regex patterns`
			`_y_pattern = r'^\d?\d?\d?\d$'`

			`_q_pattern = r'''`
			`^ # beginning of string`
			`\d?\d?\d?\d # match any number 1-9999, includes leading zeros`

			`(:?q) # use q or a : as a separator`

			`([1-4]\|(I{1,3}V?)) # match 1-4 or I-IV roman numerals`

			`$ # end of string`
			`'''`

			`_m_pattern = r'''`
			`^ # beginning of string`
			`\d?\d?\d?\d # match any number 1-9999, includes leading zeros`

			`(:?m) # use m or a : as a separator`

			`(([1-9][0-2]?)\|(I?XI{0,2}\|I?VI{0,3}\|I{1,3})) # match 1-12 or`
			`# I-XII roman numerals`

			`$ # end of string`
			`'''`


			`#NOTE: see also ts.extras.isleapyear, which accepts a sequence`
			`def _is_leap(year):`
			`year = int(year)`
			`return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)`


			`def date_parser(timestr, parserinfo=None, **kwargs):`
			`"""`
			`Uses dateutil.parser.parse, but also handles monthly dates of the form`
			`1999m4, 1999:m4, 1999:mIV, 1999mIV and the same for quarterly data`
			`with q instead of m. It is not case sensitive. The default for annual`
			`data is the end of the year, which also differs from dateutil.`
			`"""`
			`flags = re.IGNORECASE \| re.VERBOSE`
			`if re.search(_q_pattern, timestr, flags):`
			`y,q = timestr.replace(":","").lower().split('q')`
			`month, day = _quarter_to_day[q.upper()]`
			`year = int(y)`
			`elif re.search(_m_pattern, timestr, flags):`
			`y,m = timestr.replace(":","").lower().split('m')`
			`month, day = _month_to_day[m.upper()]`
			`year = int(y)`
			`if _is_leap(y) and month == 2:`
			`day += 1`
			`elif re.search(_y_pattern, timestr, flags):`
			`month, day = 12, 31`
			`year = int(timestr)`
			`else:`
			`return to_datetime(timestr, **kwargs)`

			`return datetime.datetime(year, month, day)`


			`def date_range_str(start, end=None, length=None):`
			`"""`
			`Returns a list of abbreviated date strings.`

			`Parameters`
			`----------`
			`start : str`
			`The first abbreviated date, for instance, '1965q1' or '1965m1'`
			`end : str, optional`
			`The last abbreviated date if length is None.`
			`length : int, optional`
			`The length of the returned array of end is None.`

			`Returns`
			`-------`
			`date_range : list`
			`List of strings`
			`"""`
			`flags = re.IGNORECASE \| re.VERBOSE`

			`start = start.lower()`
			`if re.search(_m_pattern, start, flags):`
			`annual_freq = 12`
			`split = 'm'`
			`elif re.search(_q_pattern, start, flags):`
			`annual_freq = 4`
			`split = 'q'`
			`elif re.search(_y_pattern, start, flags):`
			`annual_freq = 1`
			`start += 'a1' # hack`
			`if end:`
			`end += 'a1'`
			`split = 'a'`
			`else:`
			`raise ValueError("Date %s not understood" % start)`
			`yr1, offset1 = lmap(int, start.replace(":","").split(split))`
			`if end is not None:`
			`end = end.lower()`
			`yr2, offset2 = lmap(int, end.replace(":","").split(split))`
			`else: # length > 0`
			`if not length:`
			`raise ValueError("length must be provided if end is None")`
			`yr2 = yr1 + length // annual_freq`
			`offset2 = length % annual_freq + (offset1 - 1)`
			`years = [str(yr) for yr in np.repeat(lrange(yr1 + 1, yr2), annual_freq)]`
			`# tack on first year`
			`years = [(str(yr1))] * (annual_freq + 1 - offset1) + years`
			`# tack on last year`
			`years = years + [(str(yr2))] * offset2`
			`if split != 'a':`
			`offset = np.tile(np.arange(1, annual_freq + 1), yr2 - yr1 - 1).astype("S2")`
			`offset = np.r_[np.arange(offset1, annual_freq + 1).astype('S2'), offset]`
			`offset = np.r_[offset, np.arange(1, offset2 + 1).astype('S2')]`
			`date_arr_range = [''.join([i, split, asstr(j)])`
			`for i, j in zip(years, offset)]`
			`else:`
			`date_arr_range = years`
			`return date_arr_range`


			`def dates_from_str(dates):`
			`"""`
			`Turns a sequence of date strings and returns a list of datetime.`

			`Parameters`
			`----------`
			`dates : array_like`
			`A sequence of abbreviated dates as string. For instance,`
			`'1996m1' or '1996Q1'. The datetime dates are at the end of the`
			`period.`

			`Returns`
			`-------`
			`date_list : ndarray`
			`A list of datetime types.`
			`"""`
			`return lmap(date_parser, dates)`


			`def dates_from_range(start, end=None, length=None):`
			`"""`
			`Turns a sequence of date strings and returns a list of datetime.`

			`Parameters`
			`----------`
			`start : str`
			`The first abbreviated date, for instance, '1965q1' or '1965m1'`
			`end : str, optional`
			`The last abbreviated date if length is None.`
			`length : int, optional`
			`The length of the returned array of end is None.`

			`Examples`
			`--------`
			`>>> import statsmodels.api as sm`
			`>>> import pandas as pd`
			`>>> nobs = 50`
			`>>> dates = pd.date_range('1960m1', length=nobs)`


			`Returns`
			`-------`
			`date_list : ndarray`
			`A list of datetime types.`
			`"""`
			`dates = date_range_str(start, end, length)`
			`return dates_from_str(dates)`