198 lines
5.6 KiB
Python
198 lines
5.6 KiB
Python
|
"""
|
||
|
Tools for working with dates
|
||
|
"""
|
||
|
from statsmodels.compat.python import asstr, lmap, lrange, lzip
|
||
|
|
||
|
import datetime
|
||
|
import re
|
||
|
|
||
|
import numpy as np
|
||
|
from pandas import to_datetime
|
||
|
|
||
|
_quarter_to_day = {
|
||
|
"1" : (3, 31),
|
||
|
"2" : (6, 30),
|
||
|
"3" : (9, 30),
|
||
|
"4" : (12, 31),
|
||
|
"I" : (3, 31),
|
||
|
"II" : (6, 30),
|
||
|
"III" : (9, 30),
|
||
|
"IV" : (12, 31)
|
||
|
}
|
||
|
|
||
|
|
||
|
_mdays = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
|
||
|
_months_with_days = lzip(lrange(1,13), _mdays)
|
||
|
_month_to_day = dict(zip(map(str,lrange(1,13)), _months_with_days))
|
||
|
_month_to_day.update(dict(zip(["I", "II", "III", "IV", "V", "VI",
|
||
|
"VII", "VIII", "IX", "X", "XI", "XII"],
|
||
|
_months_with_days)))
|
||
|
|
||
|
# regex patterns
|
||
|
_y_pattern = r'^\d?\d?\d?\d$'
|
||
|
|
||
|
_q_pattern = r'''
|
||
|
^ # beginning of string
|
||
|
\d?\d?\d?\d # match any number 1-9999, includes leading zeros
|
||
|
|
||
|
(:?q) # use q or a : as a separator
|
||
|
|
||
|
([1-4]|(I{1,3}V?)) # match 1-4 or I-IV roman numerals
|
||
|
|
||
|
$ # end of string
|
||
|
'''
|
||
|
|
||
|
_m_pattern = r'''
|
||
|
^ # beginning of string
|
||
|
\d?\d?\d?\d # match any number 1-9999, includes leading zeros
|
||
|
|
||
|
(:?m) # use m or a : as a separator
|
||
|
|
||
|
(([1-9][0-2]?)|(I?XI{0,2}|I?VI{0,3}|I{1,3})) # match 1-12 or
|
||
|
# I-XII roman numerals
|
||
|
|
||
|
$ # end of string
|
||
|
'''
|
||
|
|
||
|
|
||
|
#NOTE: see also ts.extras.isleapyear, which accepts a sequence
|
||
|
def _is_leap(year):
|
||
|
year = int(year)
|
||
|
return year % 4 == 0 and (year % 100 != 0 or year % 400 == 0)
|
||
|
|
||
|
|
||
|
def date_parser(timestr, parserinfo=None, **kwargs):
|
||
|
"""
|
||
|
Uses dateutil.parser.parse, but also handles monthly dates of the form
|
||
|
1999m4, 1999:m4, 1999:mIV, 1999mIV and the same for quarterly data
|
||
|
with q instead of m. It is not case sensitive. The default for annual
|
||
|
data is the end of the year, which also differs from dateutil.
|
||
|
"""
|
||
|
flags = re.IGNORECASE | re.VERBOSE
|
||
|
if re.search(_q_pattern, timestr, flags):
|
||
|
y,q = timestr.replace(":","").lower().split('q')
|
||
|
month, day = _quarter_to_day[q.upper()]
|
||
|
year = int(y)
|
||
|
elif re.search(_m_pattern, timestr, flags):
|
||
|
y,m = timestr.replace(":","").lower().split('m')
|
||
|
month, day = _month_to_day[m.upper()]
|
||
|
year = int(y)
|
||
|
if _is_leap(y) and month == 2:
|
||
|
day += 1
|
||
|
elif re.search(_y_pattern, timestr, flags):
|
||
|
month, day = 12, 31
|
||
|
year = int(timestr)
|
||
|
else:
|
||
|
return to_datetime(timestr, **kwargs)
|
||
|
|
||
|
return datetime.datetime(year, month, day)
|
||
|
|
||
|
|
||
|
def date_range_str(start, end=None, length=None):
|
||
|
"""
|
||
|
Returns a list of abbreviated date strings.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : str
|
||
|
The first abbreviated date, for instance, '1965q1' or '1965m1'
|
||
|
end : str, optional
|
||
|
The last abbreviated date if length is None.
|
||
|
length : int, optional
|
||
|
The length of the returned array of end is None.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
date_range : list
|
||
|
List of strings
|
||
|
"""
|
||
|
flags = re.IGNORECASE | re.VERBOSE
|
||
|
|
||
|
start = start.lower()
|
||
|
if re.search(_m_pattern, start, flags):
|
||
|
annual_freq = 12
|
||
|
split = 'm'
|
||
|
elif re.search(_q_pattern, start, flags):
|
||
|
annual_freq = 4
|
||
|
split = 'q'
|
||
|
elif re.search(_y_pattern, start, flags):
|
||
|
annual_freq = 1
|
||
|
start += 'a1' # hack
|
||
|
if end:
|
||
|
end += 'a1'
|
||
|
split = 'a'
|
||
|
else:
|
||
|
raise ValueError("Date %s not understood" % start)
|
||
|
yr1, offset1 = lmap(int, start.replace(":","").split(split))
|
||
|
if end is not None:
|
||
|
end = end.lower()
|
||
|
yr2, offset2 = lmap(int, end.replace(":","").split(split))
|
||
|
else: # length > 0
|
||
|
if not length:
|
||
|
raise ValueError("length must be provided if end is None")
|
||
|
yr2 = yr1 + length // annual_freq
|
||
|
offset2 = length % annual_freq + (offset1 - 1)
|
||
|
years = [str(yr) for yr in np.repeat(lrange(yr1 + 1, yr2), annual_freq)]
|
||
|
# tack on first year
|
||
|
years = [(str(yr1))] * (annual_freq + 1 - offset1) + years
|
||
|
# tack on last year
|
||
|
years = years + [(str(yr2))] * offset2
|
||
|
if split != 'a':
|
||
|
offset = np.tile(np.arange(1, annual_freq + 1), yr2 - yr1 - 1).astype("S2")
|
||
|
offset = np.r_[np.arange(offset1, annual_freq + 1).astype('S2'), offset]
|
||
|
offset = np.r_[offset, np.arange(1, offset2 + 1).astype('S2')]
|
||
|
date_arr_range = [''.join([i, split, asstr(j)])
|
||
|
for i, j in zip(years, offset)]
|
||
|
else:
|
||
|
date_arr_range = years
|
||
|
return date_arr_range
|
||
|
|
||
|
|
||
|
def dates_from_str(dates):
|
||
|
"""
|
||
|
Turns a sequence of date strings and returns a list of datetime.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
dates : array_like
|
||
|
A sequence of abbreviated dates as string. For instance,
|
||
|
'1996m1' or '1996Q1'. The datetime dates are at the end of the
|
||
|
period.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
date_list : ndarray
|
||
|
A list of datetime types.
|
||
|
"""
|
||
|
return lmap(date_parser, dates)
|
||
|
|
||
|
|
||
|
def dates_from_range(start, end=None, length=None):
|
||
|
"""
|
||
|
Turns a sequence of date strings and returns a list of datetime.
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
start : str
|
||
|
The first abbreviated date, for instance, '1965q1' or '1965m1'
|
||
|
end : str, optional
|
||
|
The last abbreviated date if length is None.
|
||
|
length : int, optional
|
||
|
The length of the returned array of end is None.
|
||
|
|
||
|
Examples
|
||
|
--------
|
||
|
>>> import statsmodels.api as sm
|
||
|
>>> import pandas as pd
|
||
|
>>> nobs = 50
|
||
|
>>> dates = pd.date_range('1960m1', length=nobs)
|
||
|
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
date_list : ndarray
|
||
|
A list of datetime types.
|
||
|
"""
|
||
|
dates = date_range_str(start, end, length)
|
||
|
return dates_from_str(dates)
|