2024-10-02 22:15:59 +04:00

724 lines
21 KiB
Python

"""
Substantially copied from NumpyDoc 1.0pre.
"""
from collections import namedtuple
from collections.abc import Mapping
import copy
import inspect
import re
import textwrap
from statsmodels.tools.sm_exceptions import ParseError
def dedent_lines(lines):
"""Deindent a list of lines maximally"""
return textwrap.dedent("\n".join(lines)).split("\n")
def strip_blank_lines(line):
"""Remove leading and trailing blank lines from a list of lines"""
while line and not line[0].strip():
del line[0]
while line and not line[-1].strip():
del line[-1]
return line
class Reader:
"""
A line-based string reader.
"""
def __init__(self, data):
"""
Parameters
----------
data : str
String with lines separated by '\n'.
"""
if isinstance(data, list):
self._str = data
else:
self._str = data.split("\n") # store string as list of lines
self.reset()
def __getitem__(self, n):
return self._str[n]
def reset(self):
self._line_num = 0 # current line nr
def read(self):
if not self.eof():
out = self[self._line_num]
self._line_num += 1
return out
else:
return ""
def seek_next_non_empty_line(self):
for line in self[self._line_num :]:
if line.strip():
break
else:
self._line_num += 1
def eof(self):
return self._line_num >= len(self._str)
def read_to_condition(self, condition_func):
start = self._line_num
for line in self[start:]:
if condition_func(line):
return self[start : self._line_num]
self._line_num += 1
if self.eof():
return self[start : self._line_num + 1]
return []
def read_to_next_empty_line(self):
self.seek_next_non_empty_line()
def is_empty(line):
return not line.strip()
return self.read_to_condition(is_empty)
def read_to_next_unindented_line(self):
def is_unindented(line):
return line.strip() and (len(line.lstrip()) == len(line))
return self.read_to_condition(is_unindented)
def peek(self, n=0):
if self._line_num + n < len(self._str):
return self[self._line_num + n]
else:
return ""
def is_empty(self):
return not "".join(self._str).strip()
Parameter = namedtuple("Parameter", ["name", "type", "desc"])
class NumpyDocString(Mapping):
"""Parses a numpydoc string to an abstract representation
Instances define a mapping from section title to structured data.
"""
sections = {
"Signature": "",
"Summary": [""],
"Extended Summary": [],
"Parameters": [],
"Returns": [],
"Yields": [],
"Receives": [],
"Raises": [],
"Warns": [],
"Other Parameters": [],
"Attributes": [],
"Methods": [],
"See Also": [],
"Notes": [],
"Warnings": [],
"References": "",
"Examples": "",
"index": {},
}
def __init__(self, docstring):
orig_docstring = docstring
docstring = textwrap.dedent(docstring).split("\n")
self._doc = Reader(docstring)
self._parsed_data = copy.deepcopy(self.sections)
try:
self._parse()
except ParseError as e:
e.docstring = orig_docstring
raise
def __getitem__(self, key):
return self._parsed_data[key]
def __setitem__(self, key, val):
if key not in self._parsed_data:
self._error_location("Unknown section %s" % key)
else:
self._parsed_data[key] = val
def __iter__(self):
return iter(self._parsed_data)
def __len__(self):
return len(self._parsed_data)
def _is_at_section(self):
self._doc.seek_next_non_empty_line()
if self._doc.eof():
return False
l1 = self._doc.peek().strip() # e.g. Parameters
if l1.startswith(".. index::"):
return True
l2 = self._doc.peek(1).strip() # ---------- or ==========
return l2.startswith("-" * len(l1)) or l2.startswith("=" * len(l1))
def _strip(self, doc):
i = 0
j = 0
for i, line in enumerate(doc):
if line.strip():
break
for j, line in enumerate(doc[::-1]):
if line.strip():
break
return doc[i : len(doc) - j]
def _read_to_next_section(self):
section = self._doc.read_to_next_empty_line()
while not self._is_at_section() and not self._doc.eof():
if not self._doc.peek(-1).strip(): # previous line was empty
section += [""]
section += self._doc.read_to_next_empty_line()
return section
def _read_sections(self):
while not self._doc.eof():
data = self._read_to_next_section()
name = data[0].strip()
if name.startswith(".."): # index section
yield name, data[1:]
elif len(data) < 2:
yield StopIteration
else:
yield name, self._strip(data[2:])
def _parse_param_list(self, content, single_element_is_type=False):
r = Reader(content)
params = []
while not r.eof():
header = r.read().strip()
if " : " in header:
arg_name, arg_type = header.split(" : ")[:2]
else:
if single_element_is_type:
arg_name, arg_type = "", header
else:
arg_name, arg_type = header, ""
desc = r.read_to_next_unindented_line()
desc = dedent_lines(desc)
desc = strip_blank_lines(desc)
params.append(Parameter(arg_name, arg_type, desc))
return params
# See also supports the following formats.
#
# <FUNCNAME>
# <FUNCNAME> SPACE* COLON SPACE+ <DESC> SPACE*
# <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)+ (COMMA | PERIOD)? SPACE*
# <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)* SPACE* COLON SPACE+ <DESC> SPACE*
# <FUNCNAME> is one of
# <PLAIN_FUNCNAME>
# COLON <ROLE> COLON BACKTICK <PLAIN_FUNCNAME> BACKTICK
# where
# <PLAIN_FUNCNAME> is a legal function name, and
# <ROLE> is any nonempty sequence of word characters.
# Examples: func_f1 :meth:`func_h1` :obj:`~baz.obj_r` :class:`class_j`
# <DESC> is a string describing the function.
_role = r":(?P<role>\w+):"
_funcbacktick = r"`(?P<name>(?:~\w+\.)?[a-zA-Z0-9_\.-]+)`"
_funcplain = r"(?P<name2>[a-zA-Z0-9_\.-]+)"
_funcname = r"(" + _role + _funcbacktick + r"|" + _funcplain + r")"
_funcnamenext = _funcname.replace("role", "rolenext")
_funcnamenext = _funcnamenext.replace("name", "namenext")
_description = r"(?P<description>\s*:(\s+(?P<desc>\S+.*))?)?\s*$"
_func_rgx = re.compile(r"^\s*" + _funcname + r"\s*")
_line_rgx = re.compile(
r"^\s*"
+ r"(?P<allfuncs>"
+ _funcname # group for all function names
+ r"(?P<morefuncs>([,]\s+"
+ _funcnamenext
+ r")*)"
+ r")"
+ # end of "allfuncs"
# Some function lists have a trailing comma (or period)
r"(?P<trailing>[,\.])?"
+ _description
)
# Empty <DESC> elements are replaced with '..'
empty_description = ".."
def _parse_see_also(self, content):
"""
func_name : Descriptive text
continued text
another_func_name : Descriptive text
func_name1, func_name2, :meth:`func_name`, func_name3
"""
items = []
def parse_item_name(text):
"""Match ':role:`name`' or 'name'."""
m = self._func_rgx.match(text)
if not m:
raise ParseError(f"{text} is not a item name")
role = m.group("role")
name = m.group("name") if role else m.group("name2")
return name, role, m.end()
rest = []
for line in content:
if not line.strip():
continue
line_match = self._line_rgx.match(line)
description = None
if line_match:
description = line_match.group("desc")
if line_match.group("trailing") and description:
self._error_location(
"Unexpected comma or period after function list at "
"index %d of line "
'"%s"' % (line_match.end("trailing"), line)
)
if not description and line.startswith(" "):
rest.append(line.strip())
elif line_match:
funcs = []
text = line_match.group("allfuncs")
while True:
if not text.strip():
break
name, role, match_end = parse_item_name(text)
funcs.append((name, role))
text = text[match_end:].strip()
if text and text[0] == ",":
text = text[1:].strip()
rest = list(filter(None, [description]))
items.append((funcs, rest))
else:
raise ParseError(f"{line} is not a item name")
return items
def _parse_index(self, section, content):
"""
.. index: default
:refguide: something, else, and more
"""
def strip_each_in(lst):
return [s.strip() for s in lst]
out = {}
section = section.split("::")
if len(section) > 1:
out["default"] = strip_each_in(section[1].split(","))[0]
for line in content:
line = line.split(":")
if len(line) > 2:
out[line[1]] = strip_each_in(line[2].split(","))
return out
def _parse_summary(self):
"""Grab signature (if given) and summary"""
if self._is_at_section():
return
# If several signatures present, take the last one
while True:
summary = self._doc.read_to_next_empty_line()
summary_str = " ".join([s.strip() for s in summary]).strip()
compiled = re.compile(r"^([\w., ]+=)?\s*[\w\.]+\(.*\)$")
if compiled.match(summary_str):
self["Signature"] = summary_str
if not self._is_at_section():
continue
break
if summary is not None:
self["Summary"] = summary
if not self._is_at_section():
self["Extended Summary"] = self._read_to_next_section()
def _parse(self):
self._doc.reset()
self._parse_summary()
sections = list(self._read_sections())
section_names = {section for section, content in sections}
has_returns = "Returns" in section_names
has_yields = "Yields" in section_names
# We could do more tests, but we are not. Arbitrarily.
if has_returns and has_yields:
msg = "Docstring contains both a Returns and Yields section."
raise ValueError(msg)
if not has_yields and "Receives" in section_names:
msg = "Docstring contains a Receives section but not Yields."
raise ValueError(msg)
for (section, content) in sections:
if not section.startswith(".."):
section = (s.capitalize() for s in section.split(" "))
section = " ".join(section)
if self.get(section):
self._error_location(
"The section %s appears twice" % section
)
if section in (
"Parameters",
"Other Parameters",
"Attributes",
"Methods",
):
self[section] = self._parse_param_list(content)
elif section in (
"Returns",
"Yields",
"Raises",
"Warns",
"Receives",
):
self[section] = self._parse_param_list(
content, single_element_is_type=True
)
elif section.startswith(".. index::"):
self["index"] = self._parse_index(section, content)
elif section == "See Also":
self["See Also"] = self._parse_see_also(content)
else:
self[section] = content
def _error_location(self, msg):
if hasattr(self, "_obj"):
# we know where the docs came from:
try:
filename = inspect.getsourcefile(self._obj)
except TypeError:
filename = None
msg = msg + (
f" in the docstring of {self._obj} in {filename}."
)
raise ValueError(msg)
# string conversion routines
def _str_header(self, name, symbol="-"):
return [name, len(name) * symbol]
def _str_indent(self, doc, indent=4):
out = []
for line in doc:
out += [" " * indent + line]
return out
def _str_signature(self):
if self["Signature"]:
return [self["Signature"].replace("*", r"\*")] + [""]
else:
return [""]
def _str_summary(self):
if self["Summary"]:
return self["Summary"] + [""]
else:
return []
def _str_extended_summary(self):
if self["Extended Summary"]:
return self["Extended Summary"] + [""]
else:
return []
def _str_param_list(self, name):
out = []
if self[name]:
out += self._str_header(name)
for param in self[name]:
parts = []
if param.name:
parts.append(param.name)
if param.type:
parts.append(param.type)
out += [" : ".join(parts)]
if param.desc and "".join(param.desc).strip():
out += self._str_indent(param.desc)
out += [""]
return out
def _str_section(self, name):
out = []
if self[name]:
out += self._str_header(name)
out += self[name]
out += [""]
return out
def _str_see_also(self, func_role):
if not self["See Also"]:
return []
out = []
out += self._str_header("See Also")
last_had_desc = True
for funcs, desc in self["See Also"]:
assert isinstance(funcs, list)
links = []
for func, role in funcs:
if role:
link = f":{role}:`{func}`"
elif func_role:
link = f":{func_role}:`{func}`"
else:
link = "%s" % func
links.append(link)
link = ", ".join(links)
out += [link]
if desc:
out += self._str_indent([" ".join(desc)])
last_had_desc = True
else:
last_had_desc = False
out += self._str_indent([self.empty_description])
if last_had_desc:
out += [""]
return out
def _str_index(self):
idx = self["index"]
out = []
output_index = False
default_index = idx.get("default", "")
if default_index:
output_index = True
out += [".. index:: %s" % default_index]
for section, references in idx.items():
if section == "default":
continue
output_index = True
out += [" :{}: {}".format(section, ", ".join(references))]
if output_index:
return out
else:
return ""
def __str__(self, func_role=""):
out = []
out += self._str_signature()
out += self._str_summary()
out += self._str_extended_summary()
for param_list in (
"Parameters",
"Returns",
"Yields",
"Receives",
"Other Parameters",
"Raises",
"Warns",
):
out += self._str_param_list(param_list)
out += self._str_section("Warnings")
out += self._str_see_also(func_role)
for s in ("Notes", "References", "Examples"):
out += self._str_section(s)
for param_list in ("Attributes", "Methods"):
out += self._str_param_list(param_list)
out += self._str_index()
return "\n".join(out)
class Docstring:
"""
Docstring modification.
Parameters
----------
docstring : str
The docstring to modify.
"""
def __init__(self, docstring):
self._ds = None
self._docstring = docstring
if docstring is None:
return
self._ds = NumpyDocString(docstring)
def remove_parameters(self, parameters):
"""
Parameters
----------
parameters : str, list[str]
The names of the parameters to remove.
"""
if self._docstring is None:
# Protection against -oo execution
return
if isinstance(parameters, str):
parameters = [parameters]
repl = [
param
for param in self._ds["Parameters"]
if param.name not in parameters
]
if len(repl) + len(parameters) != len(self._ds["Parameters"]):
raise ValueError("One or more parameters were not found.")
self._ds["Parameters"] = repl
def insert_parameters(self, after, parameters):
"""
Parameters
----------
after : {None, str}
If None, inset the parameters before the first parameter in the
docstring.
parameters : Parameter, list[Parameter]
A Parameter of a list of Parameters.
"""
if self._docstring is None:
# Protection against -oo execution
return
if isinstance(parameters, Parameter):
parameters = [parameters]
if after is None:
self._ds["Parameters"] = parameters + self._ds["Parameters"]
else:
loc = -1
for i, param in enumerate(self._ds["Parameters"]):
if param.name == after:
loc = i + 1
break
if loc < 0:
raise ValueError()
params = self._ds["Parameters"][:loc] + parameters
params += self._ds["Parameters"][loc:]
self._ds["Parameters"] = params
def replace_block(self, block_name, block):
"""
Parameters
----------
block_name : str
Name of the block to replace, e.g., 'Summary'.
block : object
The replacement block. The structure of the replacement block must
match how the block is stored by NumpyDocString.
"""
if self._docstring is None:
# Protection against -oo execution
return
block_name = " ".join(map(str.capitalize, block_name.split(" ")))
if block_name not in self._ds:
raise ValueError(
"{} is not a block in the " "docstring".format(block_name)
)
if not isinstance(block, list) and isinstance(
self._ds[block_name], list
):
block = [block]
self._ds[block_name] = block
def extract_parameters(self, parameters, indent=0):
if self._docstring is None:
# Protection against -oo execution
return
if isinstance(parameters, str):
parameters = [parameters]
ds_params = {param.name: param for param in self._ds["Parameters"]}
missing = set(parameters).difference(ds_params.keys())
if missing:
raise ValueError(
"{} were not found in the "
"docstring".format(",".join(missing))
)
final = [ds_params[param] for param in parameters]
ds = copy.deepcopy(self._ds)
for key in ds:
if key != "Parameters":
ds[key] = [] if key != "index" else {}
else:
ds[key] = final
out = str(ds).strip()
if indent:
out = textwrap.indent(out, " " * indent)
out = "\n".join(out.split("\n")[2:])
return out
def __str__(self):
return str(self._ds)
def remove_parameters(docstring, parameters):
"""
Parameters
----------
docstring : str
The docstring to modify.
parameters : str, list[str]
The names of the parameters to remove.
Returns
-------
str
The modified docstring.
"""
if docstring is None:
return
ds = Docstring(docstring)
ds.remove_parameters(parameters)
return str(ds)
def indent(text, prefix, predicate=None):
"""
Non-protected indent
Parameters
----------
text : {None, str}
If None, function always returns ""
prefix : str
Prefix to add to the start of each line
predicate : callable, optional
If provided, 'prefix' will only be added to the lines
where 'predicate(line)' is True. If 'predicate' is not provided,
it will default to adding 'prefix' to all non-empty lines that do not
consist solely of whitespace characters.
Returns
-------
"""
if text is None:
return ""
return textwrap.indent(text, prefix, predicate=predicate)