254 lines
9.2 KiB
Python
254 lines
9.2 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
from functools import lru_cache
|
|
from itertools import chain, count
|
|
from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
|
|
|
|
try:
|
|
from lxml import etree
|
|
except ImportError:
|
|
# lxml is required for subsetting SVG, but we prefer to delay the import error
|
|
# until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
|
|
etree = None
|
|
|
|
from fontTools import ttLib
|
|
from fontTools.subset.util import _add_method
|
|
from fontTools.ttLib.tables.S_V_G_ import SVGDocument
|
|
|
|
|
|
__all__ = ["subset_glyphs"]
|
|
|
|
|
|
GID_RE = re.compile(r"^glyph(\d+)$")
|
|
|
|
NAMESPACES = {
|
|
"svg": "http://www.w3.org/2000/svg",
|
|
"xlink": "http://www.w3.org/1999/xlink",
|
|
}
|
|
XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
|
|
|
|
|
|
# TODO(antrotype): Replace with functools.cache once we are 3.9+
|
|
@lru_cache(maxsize=None)
|
|
def xpath(path):
|
|
# compile XPath upfront, caching result to reuse on multiple elements
|
|
return etree.XPath(path, namespaces=NAMESPACES)
|
|
|
|
|
|
def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
|
|
# select all svg elements with 'id' attribute no matter where they are
|
|
# including the root element itself:
|
|
# https://github.com/fonttools/fonttools/issues/2548
|
|
return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
|
|
|
|
|
|
def parse_css_declarations(style_attr: str) -> Dict[str, str]:
|
|
# https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
|
|
# https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
|
|
result = {}
|
|
for declaration in style_attr.split(";"):
|
|
if declaration.count(":") == 1:
|
|
property_name, value = declaration.split(":")
|
|
property_name = property_name.strip()
|
|
result[property_name] = value.strip()
|
|
elif declaration.strip():
|
|
raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
|
|
return result
|
|
|
|
|
|
def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
|
|
# Yield all the ids that can be reached via references from this element tree.
|
|
# We currently support xlink:href (as used by <use> and gradient templates),
|
|
# and local url(#...) links found in fill or clip-path attributes
|
|
# TODO(anthrotype): Check we aren't missing other supported kinds of reference
|
|
find_svg_elements_with_references = xpath(
|
|
".//svg:*[ "
|
|
"starts-with(@xlink:href, '#') "
|
|
"or starts-with(@fill, 'url(#') "
|
|
"or starts-with(@clip-path, 'url(#') "
|
|
"or contains(@style, ':url(#') "
|
|
"]",
|
|
)
|
|
for el in chain([tree], find_svg_elements_with_references(tree)):
|
|
ref_id = href_local_target(el)
|
|
if ref_id is not None:
|
|
yield ref_id
|
|
|
|
attrs = el.attrib
|
|
if "style" in attrs:
|
|
attrs = {**dict(attrs), **parse_css_declarations(el.attrib["style"])}
|
|
for attr in ("fill", "clip-path"):
|
|
if attr in attrs:
|
|
value = attrs[attr]
|
|
if value.startswith("url(#") and value.endswith(")"):
|
|
ref_id = value[5:-1]
|
|
assert ref_id
|
|
yield ref_id
|
|
|
|
|
|
def closure_element_ids(
|
|
elements: Dict[str, etree.Element], element_ids: Set[str]
|
|
) -> None:
|
|
# Expand the initial subset of element ids to include ids that can be reached
|
|
# via references from the initial set.
|
|
unvisited = element_ids
|
|
while unvisited:
|
|
referenced: Set[str] = set()
|
|
for el_id in unvisited:
|
|
if el_id not in elements:
|
|
# ignore dangling reference; not our job to validate svg
|
|
continue
|
|
referenced.update(iter_referenced_ids(elements[el_id]))
|
|
referenced -= element_ids
|
|
element_ids.update(referenced)
|
|
unvisited = referenced
|
|
|
|
|
|
def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
|
|
# Keep elements if their id is in the subset, or any of their children's id is.
|
|
# Drop elements whose id is not in the subset, and either have no children,
|
|
# or all their children are being dropped.
|
|
if el.attrib.get("id") in retained_ids:
|
|
# if id is in the set, don't recurse; keep whole subtree
|
|
return True
|
|
# recursively subset all the children; we use a list comprehension instead
|
|
# of a parentheses-less generator expression because we don't want any() to
|
|
# short-circuit, as our function has a side effect of dropping empty elements.
|
|
if any([subset_elements(e, retained_ids) for e in el]):
|
|
return True
|
|
assert len(el) == 0
|
|
parent = el.getparent()
|
|
if parent is not None:
|
|
parent.remove(el)
|
|
return False
|
|
|
|
|
|
def remap_glyph_ids(
|
|
svg: etree.Element, glyph_index_map: Dict[int, int]
|
|
) -> Dict[str, str]:
|
|
# Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
|
|
# special attributes
|
|
elements = group_elements_by_id(svg)
|
|
id_map = {}
|
|
for el_id, el in elements.items():
|
|
m = GID_RE.match(el_id)
|
|
if not m:
|
|
continue
|
|
old_index = int(m.group(1))
|
|
new_index = glyph_index_map.get(old_index)
|
|
if new_index is not None:
|
|
if old_index == new_index:
|
|
continue
|
|
new_id = f"glyph{new_index}"
|
|
else:
|
|
# If the old index is missing, the element correspond to a glyph that was
|
|
# excluded from the font's subset.
|
|
# We rename it to avoid clashes with the new GIDs or other element ids.
|
|
new_id = f".{el_id}"
|
|
n = count(1)
|
|
while new_id in elements:
|
|
new_id = f"{new_id}.{next(n)}"
|
|
|
|
id_map[el_id] = new_id
|
|
el.attrib["id"] = new_id
|
|
|
|
return id_map
|
|
|
|
|
|
def href_local_target(el: etree.Element) -> Optional[str]:
|
|
if XLINK_HREF in el.attrib:
|
|
href = el.attrib[XLINK_HREF]
|
|
if href.startswith("#") and len(href) > 1:
|
|
return href[1:] # drop the leading #
|
|
return None
|
|
|
|
|
|
def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
|
|
# update all xlink:href="#glyph..." attributes to point to the new glyph ids
|
|
for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
|
|
old_id = href_local_target(el)
|
|
assert old_id is not None
|
|
if old_id in id_map:
|
|
new_id = id_map[old_id]
|
|
el.attrib[XLINK_HREF] = f"#{new_id}"
|
|
|
|
|
|
def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
|
|
# Yield sorted, non-overlapping (min, max) ranges of consecutive integers
|
|
sorted_ints = iter(sorted(set(ints)))
|
|
try:
|
|
start = end = next(sorted_ints)
|
|
except StopIteration:
|
|
return
|
|
for v in sorted_ints:
|
|
if v - 1 == end:
|
|
end = v
|
|
else:
|
|
yield (start, end)
|
|
start = end = v
|
|
yield (start, end)
|
|
|
|
|
|
@_add_method(ttLib.getTableClass("SVG "))
|
|
def subset_glyphs(self, s) -> bool:
|
|
if etree is None:
|
|
raise ImportError("No module named 'lxml', required to subset SVG")
|
|
|
|
# glyph names (before subsetting)
|
|
glyph_order: List[str] = s.orig_glyph_order
|
|
# map from glyph names to original glyph indices
|
|
rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
|
|
# map from original to new glyph indices (after subsetting)
|
|
glyph_index_map: Dict[int, int] = s.glyph_index_map
|
|
|
|
new_docs: List[SVGDocument] = []
|
|
for doc in self.docList:
|
|
glyphs = {
|
|
glyph_order[i] for i in range(doc.startGlyphID, doc.endGlyphID + 1)
|
|
}.intersection(s.glyphs)
|
|
if not glyphs:
|
|
# no intersection: we can drop the whole record
|
|
continue
|
|
|
|
svg = etree.fromstring(
|
|
# encode because fromstring dislikes xml encoding decl if input is str.
|
|
# SVG xml encoding must be utf-8 as per OT spec.
|
|
doc.data.encode("utf-8"),
|
|
parser=etree.XMLParser(
|
|
# Disable libxml2 security restrictions to support very deep trees.
|
|
# Without this we would get an error like this:
|
|
# `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
|
|
# when parsing big fonts e.g. noto-emoji-picosvg.ttf.
|
|
huge_tree=True,
|
|
# ignore blank text as it's not meaningful in OT-SVG; it also prevents
|
|
# dangling tail text after removing an element when pretty_print=True
|
|
remove_blank_text=True,
|
|
# don't replace entities; we don't expect any in OT-SVG and they may
|
|
# be abused for XXE attacks
|
|
resolve_entities=False,
|
|
),
|
|
)
|
|
|
|
elements = group_elements_by_id(svg)
|
|
gids = {rev_orig_glyph_map[g] for g in glyphs}
|
|
element_ids = {f"glyph{i}" for i in gids}
|
|
closure_element_ids(elements, element_ids)
|
|
|
|
if not subset_elements(svg, element_ids):
|
|
continue
|
|
|
|
if not s.options.retain_gids:
|
|
id_map = remap_glyph_ids(svg, glyph_index_map)
|
|
update_glyph_href_links(svg, id_map)
|
|
|
|
new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
|
|
|
|
new_gids = (glyph_index_map[i] for i in gids)
|
|
for start, end in ranges(new_gids):
|
|
new_docs.append(SVGDocument(new_doc, start, end, doc.compressed))
|
|
|
|
self.docList = new_docs
|
|
|
|
return bool(self.docList)
|