AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/fontTools/subset/svg.py

from __future__ import annotations

import re
from functools import lru_cache
from itertools import chain, count
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple

try:
    from lxml import etree
except ImportError:
    # lxml is required for subsetting SVG, but we prefer to delay the import error
    # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
    etree = None

from fontTools import ttLib
from fontTools.subset.util import _add_method
from fontTools.ttLib.tables.S_V_G_ import SVGDocument


__all__ = ["subset_glyphs"]


GID_RE = re.compile(r"^glyph(\d+)$")

NAMESPACES = {
    "svg": "http://www.w3.org/2000/svg",
    "xlink": "http://www.w3.org/1999/xlink",
}
XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'


# TODO(antrotype): Replace with functools.cache once we are 3.9+
@lru_cache(maxsize=None)
def xpath(path):
    # compile XPath upfront, caching result to reuse on multiple elements
    return etree.XPath(path, namespaces=NAMESPACES)


def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
    # select all svg elements with 'id' attribute no matter where they are
    # including the root element itself:
    # https://github.com/fonttools/fonttools/issues/2548
    return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}


def parse_css_declarations(style_attr: str) -> Dict[str, str]:
    # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
    # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
    result = {}
    for declaration in style_attr.split(";"):
        if declaration.count(":") == 1:
            property_name, value = declaration.split(":")
            property_name = property_name.strip()
            result[property_name] = value.strip()
        elif declaration.strip():
            raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
    return result


def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
    # Yield all the ids that can be reached via references from this element tree.
    # We currently support xlink:href (as used by <use> and gradient templates),
    # and local url(#...) links found in fill or clip-path attributes
    # TODO(anthrotype): Check we aren't missing other supported kinds of reference
    find_svg_elements_with_references = xpath(
        ".//svg:*[ "
        "starts-with(@xlink:href, '#') "
        "or starts-with(@fill, 'url(#') "
        "or starts-with(@clip-path, 'url(#') "
        "or contains(@style, ':url(#') "
        "]",
    )
    for el in chain([tree], find_svg_elements_with_references(tree)):
        ref_id = href_local_target(el)
        if ref_id is not None:
            yield ref_id

        attrs = el.attrib
        if "style" in attrs:
            attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
        for attr in ("fill", "clip-path"):
            if attr in attrs:
                value = attrs[attr]
                if value.startswith("url(#") and value.endswith(")"):
                    ref_id = value[5:-1]
                    assert ref_id
                    yield ref_id


def closure_element_ids(
    elements: Dict[str, etree.Element], element_ids: Set[str]
) -> None:
    # Expand the initial subset of element ids to include ids that can be reached
    # via references from the initial set.
    unvisited = element_ids
    while unvisited:
        referenced: Set[str] = set()
        for el_id in unvisited:
            if el_id not in elements:
                # ignore dangling reference; not our job to validate svg
                continue
            referenced.update(iter_referenced_ids(elements[el_id]))
        referenced -= element_ids
        element_ids.update(referenced)
        unvisited = referenced


def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
    # Keep elements if their id is in the subset, or any of their children's id is.
    # Drop elements whose id is not in the subset, and either have no children,
    # or all their children are being dropped.
    if el.attrib.get("id") in retained_ids:
        # if id is in the set, don't recurse; keep whole subtree
        return True
    # recursively subset all the children; we use a list comprehension instead
    # of a parentheses-less generator expression because we don't want any() to
    # short-circuit, as our function has a side effect of dropping empty elements.
    if any([subset_elements(e, retained_ids) for e in el]):
        return True
    assert len(el) == 0
    parent = el.getparent()
    if parent is not None:
        parent.remove(el)
    return False


def remap_glyph_ids(
    svg: etree.Element, glyph_index_map: Dict[int, int]
) -> Dict[str, str]:
    # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
    # special attributes
    elements = group_elements_by_id(svg)
    id_map = {}
    for el_id, el in elements.items():
        m = GID_RE.match(el_id)
        if not m:
            continue
        old_index = int(m.group(1))
        new_index = glyph_index_map.get(old_index)
        if new_index is not None:
            if old_index == new_index:
                continue
            new_id = f"glyph{new_index}"
        else:
            # If the old index is missing, the element correspond to a glyph that was
            # excluded from the font's subset.
            # We rename it to avoid clashes with the new GIDs or other element ids.
            new_id = f".{el_id}"
            n = count(1)
            while new_id in elements:
                new_id = f"{new_id}.{next(n)}"

        id_map[el_id] = new_id
        el.attrib["id"] = new_id

    return id_map


def href_local_target(el: etree.Element) -> Optional[str]:
    if XLINK_HREF in el.attrib:
        href = el.attrib[XLINK_HREF]
        if href.startswith("#") and len(href) > 1:
            return href[1:]  # drop the leading #
    return None


def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
    # update all xlink:href="#glyph..." attributes to point to the new glyph ids
    for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
        old_id = href_local_target(el)
        assert old_id is not None
        if old_id in id_map:
            new_id = id_map[old_id]
            el.attrib[XLINK_HREF] = f"#{new_id}"


def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
    # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
    sorted_ints = iter(sorted(set(ints)))
    try:
        start = end = next(sorted_ints)
    except StopIteration:
        return
    for v in sorted_ints:
        if v - 1 == end:
            end = v
        else:
            yield (start, end)
            start = end = v
    yield (start, end)


@_add_method(ttLib.getTableClass("SVG "))
def subset_glyphs(self, s) -> bool:
    if etree is None:
        raise ImportError("No module named 'lxml', required to subset SVG")

    # glyph names (before subsetting)
    glyph_order: List[str] = s.orig_glyph_order
    # map from glyph names to original glyph indices
    rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
    # map from original to new glyph indices (after subsetting)
    glyph_index_map: Dict[int, int] = s.glyph_index_map

    new_docs: List[SVGDocument] = []
    for doc in self.docList:
        glyphs = {
            glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
        }.intersection(s.glyphs)
        if not glyphs:
            # no intersection: we can drop the whole record
            continue

        svg = etree.fromstring(
            # encode because fromstring dislikes xml encoding decl if input is str.
            # SVG xml encoding must be utf-8 as per OT spec.
            doc.data.encode("utf-8"),
            parser=etree.XMLParser(
                # Disable libxml2 security restrictions to support very deep trees.
                # Without this we would get an error like this:
                # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
                # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
                huge_tree=True,
                # ignore blank text as it's not meaningful in OT-SVG; it also prevents
                # dangling tail text after removing an element when pretty_print=True
                remove_blank_text=True,
                # don't replace entities; we don't expect any in OT-SVG and they may
                # be abused for XXE attacks
                resolve_entities=False,
            ),
        )

        elements = group_elements_by_id(svg)
        gids = {rev_orig_glyph_map[g] for g in glyphs}
        element_ids = {f"glyph{i}" for i in gids}
        closure_element_ids(elements, element_ids)

        if not subset_elements(svg, element_ids):
            continue

        if not s.options.retain_gids:
            id_map = remap_glyph_ids(svg, glyph_index_map)
            update_glyph_href_links(svg, id_map)

        new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")

        new_gids = (glyph_index_map[i] for i in gids)
        for start, end in ranges(new_gids):
            new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))

    self.docList = new_docs

    return bool(self.docList)
lab 1 is done 2024-10-02 22:15:59 +04:00			`from __future__ import annotations`

			`import re`
			`from functools import lru_cache`
			`from itertools import chain, count`
			`from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple`

			`try:`
			`from lxml import etree`
			`except ImportError:`
			`# lxml is required for subsetting SVG, but we prefer to delay the import error`
			`# until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)`
			`etree = None`

			`from fontTools import ttLib`
			`from fontTools.subset.util import _add_method`
			`from fontTools.ttLib.tables.S_V_G_ import SVGDocument`


			`__all__ = ["subset_glyphs"]`


			`GID_RE = re.compile(r"^glyph(\d+)$")`

			`NAMESPACES = {`
			`"svg": "http://www.w3.org/2000/svg",`
			`"xlink": "http://www.w3.org/1999/xlink",`
			`}`
			`XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'`


			`# TODO(antrotype): Replace with functools.cache once we are 3.9+`
			`@lru_cache(maxsize=None)`
			`def xpath(path):`
			`# compile XPath upfront, caching result to reuse on multiple elements`
			`return etree.XPath(path, namespaces=NAMESPACES)`


			`def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:`
			`# select all svg elements with 'id' attribute no matter where they are`
			`# including the root element itself:`
			`# https://github.com/fonttools/fonttools/issues/2548`
			`return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}`


			`def parse_css_declarations(style_attr: str) -> Dict[str, str]:`
			`# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style`
			`# https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations`
			`result = {}`
			`for declaration in style_attr.split(";"):`
			`if declaration.count(":") == 1:`
			`property_name, value = declaration.split(":")`
			`property_name = property_name.strip()`
			`result[property_name] = value.strip()`
			`elif declaration.strip():`
			`raise ValueError(f"Invalid CSS declaration syntax: {declaration}")`
			`return result`


			`def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:`
			`# Yield all the ids that can be reached via references from this element tree.`
			`# We currently support xlink:href (as used by <use> and gradient templates),`
			`# and local url(#...) links found in fill or clip-path attributes`
			`# TODO(anthrotype): Check we aren't missing other supported kinds of reference`
			`find_svg_elements_with_references = xpath(`
			`".//svg:*[ "`
			`"starts-with(@xlink:href, '#') "`
			`"or starts-with(@fill, 'url(#') "`
			`"or starts-with(@clip-path, 'url(#') "`
			`"or contains(@style, ':url(#') "`
			`"]",`
			`)`
			`for el in chain([tree], find_svg_elements_with_references(tree)):`
			`ref_id = href_local_target(el)`
			`if ref_id is not None:`
			`yield ref_id`

			`attrs = el.attrib`
			`if "style" in attrs:`
			`attrs = {dict(attrs), parse_css_declarations(el.attrib["style"])}`
			`for attr in ("fill", "clip-path"):`
			`if attr in attrs:`
			`value = attrs[attr]`
			`if value.startswith("url(#") and value.endswith(")"):`
			`ref_id = value[5:-1]`
			`assert ref_id`
			`yield ref_id`


			`def closure_element_ids(`
			`elements: Dict[str, etree.Element], element_ids: Set[str]`
			`) -> None:`
			`# Expand the initial subset of element ids to include ids that can be reached`
			`# via references from the initial set.`
			`unvisited = element_ids`
			`while unvisited:`
			`referenced: Set[str] = set()`
			`for el_id in unvisited:`
			`if el_id not in elements:`
			`# ignore dangling reference; not our job to validate svg`
			`continue`
			`referenced.update(iter_referenced_ids(elements[el_id]))`
			`referenced -= element_ids`
			`element_ids.update(referenced)`
			`unvisited = referenced`


			`def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:`
			`# Keep elements if their id is in the subset, or any of their children's id is.`
			`# Drop elements whose id is not in the subset, and either have no children,`
			`# or all their children are being dropped.`
			`if el.attrib.get("id") in retained_ids:`
			`# if id is in the set, don't recurse; keep whole subtree`
			`return True`
			`# recursively subset all the children; we use a list comprehension instead`
			`# of a parentheses-less generator expression because we don't want any() to`
			`# short-circuit, as our function has a side effect of dropping empty elements.`
			`if any([subset_elements(e, retained_ids) for e in el]):`
			`return True`
			`assert len(el) == 0`
			`parent = el.getparent()`
			`if parent is not None:`
			`parent.remove(el)`
			`return False`


			`def remap_glyph_ids(`
			`svg: etree.Element, glyph_index_map: Dict[int, int]`
			`) -> Dict[str, str]:`
			`# Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"`
			`# special attributes`
			`elements = group_elements_by_id(svg)`
			`id_map = {}`
			`for el_id, el in elements.items():`
			`m = GID_RE.match(el_id)`
			`if not m:`
			`continue`
			`old_index = int(m.group(1))`
			`new_index = glyph_index_map.get(old_index)`
			`if new_index is not None:`
			`if old_index == new_index:`
			`continue`
			`new_id = f"glyph{new_index}"`
			`else:`
			`# If the old index is missing, the element correspond to a glyph that was`
			`# excluded from the font's subset.`
			`# We rename it to avoid clashes with the new GIDs or other element ids.`
			`new_id = f".{el_id}"`
			`n = count(1)`
			`while new_id in elements:`
			`new_id = f"{new_id}.{next(n)}"`

			`id_map[el_id] = new_id`
			`el.attrib["id"] = new_id`

			`return id_map`


			`def href_local_target(el: etree.Element) -> Optional[str]:`
			`if XLINK_HREF in el.attrib:`
			`href = el.attrib[XLINK_HREF]`
			`if href.startswith("#") and len(href) > 1:`
			`return href[1:] # drop the leading #`
			`return None`


			`def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:`
			`# update all xlink:href="#glyph..." attributes to point to the new glyph ids`
			`for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):`
			`old_id = href_local_target(el)`
			`assert old_id is not None`
			`if old_id in id_map:`
			`new_id = id_map[old_id]`
			`el.attrib[XLINK_HREF] = f"#{new_id}"`


			`def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:`
			`# Yield sorted, non-overlapping (min, max) ranges of consecutive integers`
			`sorted_ints = iter(sorted(set(ints)))`
			`try:`
			`start = end = next(sorted_ints)`
			`except StopIteration:`
			`return`
			`for v in sorted_ints:`
			`if v - 1 == end:`
			`end = v`
			`else:`
			`yield (start, end)`
			`start = end = v`
			`yield (start, end)`


			`@_add_method(ttLib.getTableClass("SVG "))`
			`def subset_glyphs(self, s) -> bool:`
			`if etree is None:`
			`raise ImportError("No module named 'lxml', required to subset SVG")`

			`# glyph names (before subsetting)`
			`glyph_order: List[str] = s.orig_glyph_order`
			`# map from glyph names to original glyph indices`
			`rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap`
			`# map from original to new glyph indices (after subsetting)`
			`glyph_index_map: Dict[int, int] = s.glyph_index_map`

			`new_docs: List[SVGDocument] = []`
			`for doc in self.docList:`
			`glyphs = {`
			`glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)`
			`}.intersection(s.glyphs)`
			`if not glyphs:`
			`# no intersection: we can drop the whole record`
			`continue`

			`svg = etree.fromstring(`
			`# encode because fromstring dislikes xml encoding decl if input is str.`
			`# SVG xml encoding must be utf-8 as per OT spec.`
			`doc.data.encode("utf-8"),`
			`parser=etree.XMLParser(`
			`# Disable libxml2 security restrictions to support very deep trees.`
			`# Without this we would get an error like this:`
			# `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
			`# when parsing big fonts e.g. noto-emoji-picosvg.ttf.`
			`huge_tree=True,`
			`# ignore blank text as it's not meaningful in OT-SVG; it also prevents`
			`# dangling tail text after removing an element when pretty_print=True`
			`remove_blank_text=True,`
			`# don't replace entities; we don't expect any in OT-SVG and they may`
			`# be abused for XXE attacks`
			`resolve_entities=False,`
			`),`
			`)`

			`elements = group_elements_by_id(svg)`
			`gids = {rev_orig_glyph_map[g] for g in glyphs}`
			`element_ids = {f"glyph{i}" for i in gids}`
			`closure_element_ids(elements, element_ids)`

			`if not subset_elements(svg, element_ids):`
			`continue`

			`if not s.options.retain_gids:`
			`id_map = remap_glyph_ids(svg, glyph_index_map)`
			`update_glyph_href_links(svg, id_map)`

			`new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")`

			`new_gids = (glyph_index_map[i] for i in gids)`
			`for start, end in ranges(new_gids):`
			`new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))`

			`self.docList = new_docs`

			`return bool(self.docList)`