AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/patsy/tokens.py

# This file is part of Patsy
# Copyright (C) 2011 Nathaniel Smith <njs@pobox.com>
# See file LICENSE.txt for license information.

# Utilities for dealing with Python code at the token level.
#
# Includes:
#   a "pretty printer" that converts a sequence of tokens back into a
#       readable, white-space normalized string.
#   a utility function to replace calls to global functions with calls to
#       other functions

import tokenize
from six.moves import cStringIO as StringIO

from patsy import PatsyError
from patsy.origin import Origin

__all__ = ["python_tokenize", "pretty_untokenize",
           "normalize_token_spacing"]

# A convenience wrapper around tokenize.generate_tokens. yields tuples
#   (tokenize type, token string, origin object)
def python_tokenize(code):
    # Since formulas can only contain Python expressions, and Python
    # expressions cannot meaningfully contain newlines, we'll just remove all
    # the newlines up front to avoid any complications:
    code = code.replace("\n", " ").strip()
    it = tokenize.generate_tokens(StringIO(code).readline)
    try:
        for (pytype, string, (_, start), (_, end), code) in it:
            if pytype == tokenize.ENDMARKER:
                break
            if pytype in (tokenize.NL, tokenize.NEWLINE):
                assert string == ""
                continue
            origin = Origin(code, start, end)
            if pytype == tokenize.ERRORTOKEN:
                raise PatsyError("error tokenizing input "
                                 "(maybe an unclosed string?)",
                                 origin)
            if pytype == tokenize.COMMENT:
                raise PatsyError("comments are not allowed", origin)
            yield (pytype, string, origin)
        else: # pragma: no cover
            raise ValueError("stream ended without ENDMARKER?!?")
    except tokenize.TokenError as e:
        # TokenError is raised iff the tokenizer thinks that there is
        # some sort of multi-line construct in progress (e.g., an
        # unclosed parentheses, which in Python lets a virtual line
        # continue past the end of the physical line), and it hits the
        # end of the source text. We have our own error handling for
        # such cases, so just treat this as an end-of-stream.
        #
        if "unterminated string literal" in e.args[0]:
            raise PatsyError(
                "error tokenizing input ({})".format(e.args[0]),
                Origin(code, 0, len(code)),
            )

        # Just in case someone adds some other error case:
        assert "EOF in multi-line" in e.args[0]
        return

def test_python_tokenize():
    code = "a + (foo * -1)"
    tokens = list(python_tokenize(code))
    expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),
                (tokenize.OP, "+", Origin(code, 2, 3)),
                (tokenize.OP, "(", Origin(code, 4, 5)),
                (tokenize.NAME, "foo", Origin(code, 5, 8)),
                (tokenize.OP, "*", Origin(code, 9, 10)),
                (tokenize.OP, "-", Origin(code, 11, 12)),
                (tokenize.NUMBER, "1", Origin(code, 12, 13)),
                (tokenize.OP, ")", Origin(code, 13, 14))]
    assert tokens == expected

    code2 = "a + (b"
    tokens2 = list(python_tokenize(code2))
    expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),
                 (tokenize.OP, "+", Origin(code2, 2, 3)),
                 (tokenize.OP, "(", Origin(code2, 4, 5)),
                 (tokenize.NAME, "b", Origin(code2, 5, 6))]
    assert tokens2 == expected2

    import pytest
    pytest.raises(PatsyError, list, python_tokenize("a b # c"))

    import pytest
    pytest.raises(PatsyError, list, python_tokenize("a b \"c"))

_python_space_both = (list("+-*/%&^|<>")
                      + ["==", "<>", "!=", "<=", ">=",
                         "<<", ">>", "**", "//"])
_python_space_before = (_python_space_both
                        + ["!", "~"])
_python_space_after = (_python_space_both
                       + [",", ":"])

def pretty_untokenize(typed_tokens):
    text = []
    prev_was_space_delim = False
    prev_wants_space = False
    prev_was_open_paren_or_comma = False
    prev_was_object_like = False
    brackets = []
    for token_type, token in typed_tokens:
        assert token_type not in (tokenize.INDENT, tokenize.DEDENT,
                                  tokenize.NL)
        if token_type == tokenize.NEWLINE:
            continue
        if token_type == tokenize.ENDMARKER:
            continue
        if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):
            if prev_wants_space or prev_was_space_delim:
                text.append(" ")
            text.append(token)
            prev_wants_space = False
            prev_was_space_delim = True
        else:
            if token in ("(", "[", "{"):
                brackets.append(token)
            elif brackets and token in (")", "]", "}"):
                brackets.pop()
            this_wants_space_before = (token in _python_space_before)
            this_wants_space_after = (token in _python_space_after)
            # Special case for slice syntax: foo[:10]
            # Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."
            if token == ":" and brackets and brackets[-1] == "[":
                this_wants_space_after = False
            # Special case for foo(*args), foo(a, *args):
            if token in ("*", "**") and prev_was_open_paren_or_comma:
                this_wants_space_before = False
                this_wants_space_after = False
            # Special case for "a = foo(b=1)":
            if token == "=" and not brackets:
                this_wants_space_before = True
                this_wants_space_after = True
            # Special case for unary -, +. Our heuristic is that if we see the
            # + or - after something that looks like an object (a NAME,
            # NUMBER, STRING, or close paren) then it is probably binary,
            # otherwise it is probably unary.
            if token in ("+", "-") and not prev_was_object_like:
                this_wants_space_before = False
                this_wants_space_after = False
            if prev_wants_space or this_wants_space_before:
                text.append(" ")
            text.append(token)
            prev_wants_space = this_wants_space_after
            prev_was_space_delim = False
        if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)
            or token == ")"):
            prev_was_object_like = True
        else:
            prev_was_object_like = False
        prev_was_open_paren_or_comma = token in ("(", ",")
    return "".join(text)

def normalize_token_spacing(code):
    tokens = [(t[0], t[1])
              for t in tokenize.generate_tokens(StringIO(code).readline)]
    return pretty_untokenize(tokens)

def test_pretty_untokenize_and_normalize_token_spacing():
    assert normalize_token_spacing("1 + 1") == "1 + 1"
    assert normalize_token_spacing("1+1") == "1 + 1"
    assert normalize_token_spacing("1*(2+3**2)") == "1 * (2 + 3 ** 2)"
    assert normalize_token_spacing("a and b") == "a and b"
    assert normalize_token_spacing("foo(a=bar.baz[1:])") == "foo(a=bar.baz[1:])"
    assert normalize_token_spacing("""{"hi":foo[:]}""") == """{"hi": foo[:]}"""
    assert normalize_token_spacing("""'a' "b" 'c'""") == """'a' "b" 'c'"""
    assert normalize_token_spacing('"""a""" is 1 or 2==3') == '"""a""" is 1 or 2 == 3'
    assert normalize_token_spacing("foo ( * args )") == "foo(*args)"
    assert normalize_token_spacing("foo ( a * args )") == "foo(a * args)"
    assert normalize_token_spacing("foo ( ** args )") == "foo(**args)"
    assert normalize_token_spacing("foo ( a ** args )") == "foo(a ** args)"
    assert normalize_token_spacing("foo (1, * args )") == "foo(1, *args)"
    assert normalize_token_spacing("foo (1, a * args )") == "foo(1, a * args)"
    assert normalize_token_spacing("foo (1, ** args )") == "foo(1, **args)"
    assert normalize_token_spacing("foo (1, a ** args )") == "foo(1, a ** args)"

    assert normalize_token_spacing("a=foo(b = 1)") == "a = foo(b=1)"

    assert normalize_token_spacing("foo(+ 10, bar = - 1)") == "foo(+10, bar=-1)"
    assert normalize_token_spacing("1 + +10 + -1 - 5") == "1 + +10 + -1 - 5"
lab 1 is done 2024-10-02 22:15:59 +04:00			`# This file is part of Patsy`
			`# Copyright (C) 2011 Nathaniel Smith <njs@pobox.com>`
			`# See file LICENSE.txt for license information.`

			`# Utilities for dealing with Python code at the token level.`
			`#`
			`# Includes:`
			`# a "pretty printer" that converts a sequence of tokens back into a`
			`# readable, white-space normalized string.`
			`# a utility function to replace calls to global functions with calls to`
			`# other functions`

			`import tokenize`
			`from six.moves import cStringIO as StringIO`

			`from patsy import PatsyError`
			`from patsy.origin import Origin`

			`__all__ = ["python_tokenize", "pretty_untokenize",`
			`"normalize_token_spacing"]`

			`# A convenience wrapper around tokenize.generate_tokens. yields tuples`
			`# (tokenize type, token string, origin object)`
			`def python_tokenize(code):`
			`# Since formulas can only contain Python expressions, and Python`
			`# expressions cannot meaningfully contain newlines, we'll just remove all`
			`# the newlines up front to avoid any complications:`
			`code = code.replace("\n", " ").strip()`
			`it = tokenize.generate_tokens(StringIO(code).readline)`
			`try:`
			`for (pytype, string, (_, start), (_, end), code) in it:`
			`if pytype == tokenize.ENDMARKER:`
			`break`
			`if pytype in (tokenize.NL, tokenize.NEWLINE):`
			`assert string == ""`
			`continue`
			`origin = Origin(code, start, end)`
			`if pytype == tokenize.ERRORTOKEN:`
			`raise PatsyError("error tokenizing input "`
			`"(maybe an unclosed string?)",`
			`origin)`
			`if pytype == tokenize.COMMENT:`
			`raise PatsyError("comments are not allowed", origin)`
			`yield (pytype, string, origin)`
			`else: # pragma: no cover`
			`raise ValueError("stream ended without ENDMARKER?!?")`
			`except tokenize.TokenError as e:`
			`# TokenError is raised iff the tokenizer thinks that there is`
			`# some sort of multi-line construct in progress (e.g., an`
			`# unclosed parentheses, which in Python lets a virtual line`
			`# continue past the end of the physical line), and it hits the`
			`# end of the source text. We have our own error handling for`
			`# such cases, so just treat this as an end-of-stream.`
			`#`
			`if "unterminated string literal" in e.args[0]:`
			`raise PatsyError(`
			`"error tokenizing input ({})".format(e.args[0]),`
			`Origin(code, 0, len(code)),`
			`)`

			`# Just in case someone adds some other error case:`
			`assert "EOF in multi-line" in e.args[0]`
			`return`

			`def test_python_tokenize():`
			`code = "a + (foo * -1)"`
			`tokens = list(python_tokenize(code))`
			`expected = [(tokenize.NAME, "a", Origin(code, 0, 1)),`
			`(tokenize.OP, "+", Origin(code, 2, 3)),`
			`(tokenize.OP, "(", Origin(code, 4, 5)),`
			`(tokenize.NAME, "foo", Origin(code, 5, 8)),`
			`(tokenize.OP, "*", Origin(code, 9, 10)),`
			`(tokenize.OP, "-", Origin(code, 11, 12)),`
			`(tokenize.NUMBER, "1", Origin(code, 12, 13)),`
			`(tokenize.OP, ")", Origin(code, 13, 14))]`
			`assert tokens == expected`

			`code2 = "a + (b"`
			`tokens2 = list(python_tokenize(code2))`
			`expected2 = [(tokenize.NAME, "a", Origin(code2, 0, 1)),`
			`(tokenize.OP, "+", Origin(code2, 2, 3)),`
			`(tokenize.OP, "(", Origin(code2, 4, 5)),`
			`(tokenize.NAME, "b", Origin(code2, 5, 6))]`
			`assert tokens2 == expected2`

			`import pytest`
			`pytest.raises(PatsyError, list, python_tokenize("a b # c"))`

			`import pytest`
			`pytest.raises(PatsyError, list, python_tokenize("a b \"c"))`

			`_python_space_both = (list("+-*/%&^\|<>")`
			`+ ["==", "<>", "!=", "<=", ">=",`
			`"<<", ">>", "**", "//"])`
			`_python_space_before = (_python_space_both`
			`+ ["!", "~"])`
			`_python_space_after = (_python_space_both`
			`+ [",", ":"])`

			`def pretty_untokenize(typed_tokens):`
			`text = []`
			`prev_was_space_delim = False`
			`prev_wants_space = False`
			`prev_was_open_paren_or_comma = False`
			`prev_was_object_like = False`
			`brackets = []`
			`for token_type, token in typed_tokens:`
			`assert token_type not in (tokenize.INDENT, tokenize.DEDENT,`
			`tokenize.NL)`
			`if token_type == tokenize.NEWLINE:`
			`continue`
			`if token_type == tokenize.ENDMARKER:`
			`continue`
			`if token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING):`
			`if prev_wants_space or prev_was_space_delim:`
			`text.append(" ")`
			`text.append(token)`
			`prev_wants_space = False`
			`prev_was_space_delim = True`
			`else:`
			`if token in ("(", "[", "{"):`
			`brackets.append(token)`
			`elif brackets and token in (")", "]", "}"):`
			`brackets.pop()`
			`this_wants_space_before = (token in _python_space_before)`
			`this_wants_space_after = (token in _python_space_after)`
			`# Special case for slice syntax: foo[:10]`
			`# Otherwise ":" is spaced after, like: "{1: ...}", "if a: ..."`
			`if token == ":" and brackets and brackets[-1] == "[":`
			`this_wants_space_after = False`
			`# Special case for foo(args), foo(a, args):`
			`if token in ("", "*") and prev_was_open_paren_or_comma:`
			`this_wants_space_before = False`
			`this_wants_space_after = False`
			`# Special case for "a = foo(b=1)":`
			`if token == "=" and not brackets:`
			`this_wants_space_before = True`
			`this_wants_space_after = True`
			`# Special case for unary -, +. Our heuristic is that if we see the`
			`# + or - after something that looks like an object (a NAME,`
			`# NUMBER, STRING, or close paren) then it is probably binary,`
			`# otherwise it is probably unary.`
			`if token in ("+", "-") and not prev_was_object_like:`
			`this_wants_space_before = False`
			`this_wants_space_after = False`
			`if prev_wants_space or this_wants_space_before:`
			`text.append(" ")`
			`text.append(token)`
			`prev_wants_space = this_wants_space_after`
			`prev_was_space_delim = False`
			`if (token_type in (tokenize.NAME, tokenize.NUMBER, tokenize.STRING)`
			`or token == ")"):`
			`prev_was_object_like = True`
			`else:`
			`prev_was_object_like = False`
			`prev_was_open_paren_or_comma = token in ("(", ",")`
			`return "".join(text)`

			`def normalize_token_spacing(code):`
			`tokens = [(t[0], t[1])`
			`for t in tokenize.generate_tokens(StringIO(code).readline)]`
			`return pretty_untokenize(tokens)`

			`def test_pretty_untokenize_and_normalize_token_spacing():`
			`assert normalize_token_spacing("1 + 1") == "1 + 1"`
			`assert normalize_token_spacing("1+1") == "1 + 1"`
			`assert normalize_token_spacing("1(2+32)") == "1 (2 + 3 ** 2)"`
			`assert normalize_token_spacing("a and b") == "a and b"`
			`assert normalize_token_spacing("foo(a=bar.baz[1:])") == "foo(a=bar.baz[1:])"`
			`assert normalize_token_spacing("""{"hi":foo[:]}""") == """{"hi": foo[:]}"""`
			`assert normalize_token_spacing("""'a' "b" 'c'""") == """'a' "b" 'c'"""`
			`assert normalize_token_spacing('"""a""" is 1 or 2==3') == '"""a""" is 1 or 2 == 3'`
			`assert normalize_token_spacing("foo ( * args )") == "foo(*args)"`
			`assert normalize_token_spacing("foo ( a * args )") == "foo(a * args)"`
			`assert normalize_token_spacing("foo ( args )") == "foo(args)"`
			`assert normalize_token_spacing("foo ( a args )") == "foo(a args)"`
			`assert normalize_token_spacing("foo (1, * args )") == "foo(1, *args)"`
			`assert normalize_token_spacing("foo (1, a * args )") == "foo(1, a * args)"`
			`assert normalize_token_spacing("foo (1, args )") == "foo(1, args)"`
			`assert normalize_token_spacing("foo (1, a args )") == "foo(1, a args)"`

			`assert normalize_token_spacing("a=foo(b = 1)") == "a = foo(b=1)"`

			`assert normalize_token_spacing("foo(+ 10, bar = - 1)") == "foo(+10, bar=-1)"`
			`assert normalize_token_spacing("1 + +10 + -1 - 5") == "1 + +10 + -1 - 5"`