"""
This :mod:`cellmlmanip.parser` module contains the CellML parser and related classes. It reads a CellML model and
stores model information in the :class:`cellmlmanip.model.Model` class. MathML equations are translated to Sympy. RDF
is handled by RDFLib.
"""
import itertools
import logging
import os
from collections import deque
from enum import Enum
import sympy
from lxml import etree
from cellmlmanip.model import SYMPY_SYMBOL_DELIMITER, Model
logger = logging.getLogger(__name__)
UNIT_PREFIXES = {
'yocto': 1e-24,
'zepto': 1e-21,
'atto': 1e-18,
'femto': 1e-15,
'pico': 1e-12,
'nano': 1e-9,
'micro': 1e-6,
'milli': 1e-3,
'centi': 1e-2,
'deci': 1e-1,
'deca': 1e+1,
'deka': 1e+1,
'hecto': 1e2,
'kilo': 1e3,
'mega': 1e6,
'giga': 1e9,
'tera': 1e12,
'peta': 1e15,
'exa': 1e18,
'zetta': 1e21,
'yotta': 1e24
}
# Work around for sympy issue dealing with relationals in equations that er passed to Piecewise
# see https://github.com/sympy/sympy/issues/24086
# and https://github.com/ModellingWebLab/cellmlmanip/issues/350
sympy.Eq.is_Boolean = True
sympy.Ne.is_Boolean = True
sympy.Ge.is_Boolean = True
sympy.Le.is_Boolean = True
sympy.Gt.is_Boolean = True
sympy.Lt.is_Boolean = True
[docs]
class XmlNs(Enum):
"""Namespaces in CellML documents"""
CELLML = 'http://www.cellml.org/cellml/1.0#'
CMETA = 'http://www.cellml.org/metadata/1.0#'
MATHML = 'http://www.w3.org/1998/Math/MathML'
RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
[docs]
def with_ns(ns_enum, name):
"""Returns an ElementTree-friendly name with namespace in brackets"""
return '{%s}%s' % (ns_enum.value, name)
def _dump_node(node):
"""Pretty-print an XML node."""
return etree.tostring(node, pretty_print=True).decode()
class _Component:
"""This hold information about a CellML component. It's for internal-use only. Once the parser
has created the flattened cellmlmanip.Model instance, components are no longer used"""
def __init__(self, name):
self.name = name
self.parent = None
self.siblings = set()
self.encapsulated = set()
def set_parent(self, parent_name):
"""Sets the parent of this component"""
if self.parent:
raise ValueError('Parent of component %s already %s. Cannot set %s: multiple parents not allowed!' %
(self.name, self.parent, parent_name))
self.parent = parent_name
def add_sibling(self, sibling_name):
"""Adds a sibling for this component"""
assert sibling_name not in self.siblings, 'Sibling component %s already added!' % sibling_name
self.siblings.add(sibling_name)
def add_encapsulated(self, encapsulated_name):
"""Adds an encapsulated component to this component"""
if encapsulated_name in self.encapsulated:
raise ValueError('Encapsulated component %s already added!' % encapsulated_name)
self.encapsulated.add(encapsulated_name)
[docs]
class Parser(object):
"""Handles parsing of CellML files"""
def __init__(self, filepath):
"""Initialise an instance of Parser
:param filepath: the full filepath to the CellML model file
"""
self.filepath = filepath
# A :class:`Model` object or None
self.model = None
# A dictionary mapping component names to _Component objects
self.components = {}
[docs]
def parse(self, unit_store=None):
"""
The main method that reads the XML file and extracts the relevant parts of the CellML model
definition.
:param unit_store: Optional :class:`cellmlmanip.units.UnitStore` instance; if given the model will share the
underlying registry so that conversions between model units and those from the provided store work.
:return: a :class:`Model` holding CellML model definition, reading for manipulation.
"""
# Create lxml parser
parser = etree.XMLParser(no_network=True)
# Parse, get ElementTree
tree = etree.parse(self.filepath, parser)
# Validate CellML syntax
self._validate(parser, tree)
# <model> root node - initialise the model object
model_xml = tree.getroot()
# Raise error if component units are defined
units_in_comp_xpath = with_ns(XmlNs.CELLML, 'component') + '/' + with_ns(XmlNs.CELLML, 'units')
units_in_components = model_xml.findall(units_in_comp_xpath)
if len(units_in_components) != 0:
msg = 'Defining units inside components is not supported (found in component'
msg += 's ' if len(units_in_components) > 1 else ' '
msg += ', '.join([e.getparent().get('name') for e in units_in_components]) + ').'
raise ValueError(msg)
self.model = Model(model_xml.get('name'),
model_xml.get(with_ns(XmlNs.CMETA, 'id')),
unit_store=unit_store)
# handle the child elements of <model>
self._add_units(model_xml)
self._add_rdf(model_xml)
component_variables = self._add_components(model_xml)
self._add_relationships(model_xml)
connected_variable_mapping = self._add_connections(model_xml)
self._add_maths(component_variables, connected_variable_mapping)
# Canonicalise representation
self.transform_constants()
return self.model
@staticmethod
def _get_variable_name(component_name, variable_name):
return component_name + SYMPY_SYMBOL_DELIMITER + variable_name
def _add_rdf(self, element):
"""
Finds all ``<RDF>`` definitions under ``<element>`` and adds them to the model.
:param element: the CellML parent element to search for children RDF tags
"""
for rdf in element.iter(with_ns(XmlNs.RDF, 'RDF')):
self.model.add_rdf(etree.tostring(rdf, encoding=str))
def _add_units(self, model):
"""
<model> <units> <unit /> </units> </model>
:param model: an etree.Element
"""
units_elements = model.findall(with_ns(XmlNs.CELLML, 'units'))
# get list of built-in cellml units
from cellmlmanip.units import _CELLML_UNITS
units_found = set(_CELLML_UNITS)
# get all the units defined in the cellml model
definitions_to_add = deque()
for units_element in units_elements:
units_name = units_element.get('name')
# if it's a defined base unit, we can be add immediately to the model
if units_element.get('base_units'):
self.model.units.add_base_unit(units_name)
units_found.add(units_name)
# all other units are collected (because they may depend on further user-defined units)
else:
unit_elements = [t.attrib for t in units_element.getchildren()]
definitions_to_add.append((units_name, unit_elements))
iteration = 0
# while we still have units to add
while definitions_to_add:
# get a definition from the top of the list
unit_name, unit_elements = definitions_to_add.pop()
# check whether this unit is defined in terms of units that we know about
add_now = True
for unit in unit_elements:
# if defined in terms of units we don't know about
if unit['units'] not in units_found:
# defer adding this units - add it back to the end of the list
definitions_to_add.appendleft((unit_name, unit_elements))
add_now = False
break
# unit is defined in terms of known units - ok to add to model
if add_now:
definition = self._make_pint_unit_definition(unit_name, unit_elements)
if self.model.units.is_defined(unit_name):
raise ValueError(f'Duplicate unit definition {unit_name}, unit names need to be unique!')
self.model.units.add_unit(unit_name, definition)
units_found.add(unit_name)
iteration = 0
else:
# we did not add any units in this iteration - make note
iteration += 1
# exit if we have not been able to add any units in the entire list of definitions
if iteration > len(definitions_to_add):
raise ValueError('Cannot create units %s. Cycles or unknown units.' % definitions_to_add)
def _make_pint_unit_definition(self, units_name, unit_attributes):
"""
Construct and return a ``pint.UnitDefinition``.
:param units_name: The unit name
:param unit_attributes: A list of dicts, where each dict contains the fields
``{'multiplier': float, 'units': string, 'exponent': integer, 'prefix': string/integer}``.
Not all fields are necessary but ``units`` must match a unit in Pint registry.
"""
full_unit_expr = []
# For each of the <unit> elements for this unit definition
for unit_element in unit_attributes:
# Start from the unit name
expr = unit_element['units']
# See https://www.cellml.org/specifications/cellml_1.1/#sec_units 5.2.2
# offset, prefix, exponent, and multiplier
if 'prefix' in unit_element:
try:
power = UNIT_PREFIXES[unit_element['prefix']]
except KeyError:
# Assume that prefix is an integer.
power = '1e%s' % unit_element['prefix']
expr = '(%s * %s)' % (expr, power)
if 'exponent' in unit_element:
expr = '((%s)**%s)' % (expr, unit_element['exponent'])
if 'multiplier' in unit_element:
expr = '(%s * %s)' % (unit_element['multiplier'], expr)
if 'offset' in unit_element and (not unit_element['offset'].strip().isnumeric() or
int(unit_element['offset']) != 0):
raise ValueError('Offsets in units are not supported!')
# Collect/add this particular <unit> definition
full_unit_expr.append(expr)
# Join together all the parts of the unit expression and return
return '*'.join(full_unit_expr)
def _add_components(self, model):
"""
<model> <component> </model>
:param model: an etree.Element
:return: a list of (element, variable_to_symbol) tuples with maths to be added later.
"""
component_elements = model.findall(with_ns(XmlNs.CELLML, 'component'))
component_variables = []
# for each component defined in the model
for element in component_elements:
# component are only kept in parser to resolve relationships and connections
name = element.get('name')
if name in self.components:
raise ValueError(f'Duplicate component name {name}, component names must be unique!')
self.components[name] = _Component(name)
# process the <variable> tags in this component
variable_to_symbol = self._add_variables(element)
# to speed up parsing, we store the maths to add to perform adding later when we know how variables connect
component_variables.append((element, variable_to_symbol))
# Raise error if reactions are defined
reactions = element.findall(with_ns(XmlNs.CELLML, 'reaction'))
if reactions:
raise ValueError(
'Reactions are not supported (found in component ' + name + ').')
return component_variables
def _add_variables(self, component_element):
"""
<model> <component> <variable> </component> </model>
:param component_element: an etree.Element
"""
variable_elements = component_element.findall(with_ns(XmlNs.CELLML, 'variable'))
# we keep a {variable name: sympy symbol} lookup that we pass to the transpiler
variable_lookup_symbol = {}
for variable_element in variable_elements:
attributes = dict(variable_element.attrib)
# Rename key for cmeta_id (remove namespace from attribute)
cmeta_id_attribute = with_ns(XmlNs.CMETA, 'id')
if cmeta_id_attribute in attributes:
attributes['cmeta_id'] = attributes.pop(cmeta_id_attribute)
# mangle the name by prefixing with the component name
attributes['name'] = Parser._get_variable_name(component_element.get('name'),
attributes['name'])
# look up units
attributes['units'] = self.model.units.get_unit(attributes['units'])
# model.add_variable() returns sympy dummy created for this variable - keep it
variable_lookup_symbol[attributes['name']] = self.model.add_variable(**attributes)
return variable_lookup_symbol
def _add_maths(self, component_variables, connected_variable_mapping):
"""
Add maths for all elements stored in component_variables.
<model> <component> <math> </component> </model>
:param component_variables: a list of (component_element, variable_to_symbol) tuples
where ``component_element`` is an ``etree.Element`
and ``variable_to_symbol`` is a ``Dict[str, sympy.Dummy]``
with the key the variable name and the value the variable in the model
:param connected_variable_mapping: a ``Dict[str, sympy.Dummy]`` mapping a connected variable to its source.
"""
for component_element, variable_to_symbol in component_variables:
# get all <math> elements in the component
math_elements = component_element.findall(with_ns(XmlNs.MATHML, 'math'))
# nothing to do if we don't have any <math> elements
if math_elements:
# Method to create symbols
prefix = component_element.get('name') + SYMPY_SYMBOL_DELIMITER
def symbol_generator(identifer):
symbol_name = prefix + identifer
out = variable_to_symbol.get(symbol_name, None)
while str(out) in connected_variable_mapping:
out = connected_variable_mapping[str(out)]
assert out is not None, '%s not found in symbol dict' % (prefix + identifer)
return out
# Set up transpiler to generate correct symbol names for connected variables in one go
# without the need for post-processing
transpiler = Transpiler(
symbol_generator=symbol_generator,
number_generator=lambda x, y: self.model.create_quantity(x, self.model.units.get_unit(y)),
)
# for each math element
for math_element in math_elements:
sympy_exprs = transpiler.parse_tree(math_element)
# add each equation from <math> to the model
for expr in sympy_exprs:
self.model.add_equation(expr)
def _add_relationships(self, model: etree.Element):
group_elements = model.findall(with_ns(XmlNs.CELLML, 'group'))
# find all the <group> elements
for group_element in group_elements:
# find the relationship for this <group>
relationship_ref = group_element.findall(with_ns(XmlNs.CELLML, 'relationship_ref'))
if len(relationship_ref) != 1:
raise ValueError("Expecting exactly 1 relationship_ref tag per group, got %s!" % len(relationship_ref))
relationship = relationship_ref[0].attrib.get('relationship')
# we only handle 'encapsulation' relationships (i.e. ignoring 'containment')
if relationship == 'encapsulation':
self._handle_component_ref(group_element, None)
def _handle_component_ref(self, parent_tag, parent_component):
# we're going to process all the siblings at the end
siblings = []
# for each of the child <component_ref> elements in the parent tag
for component_ref_element in parent_tag.findall(with_ns(XmlNs.CELLML, 'component_ref')):
# get the name of the child component
child_component = component_ref_element.attrib.get('component')
# add it to the sibling list
siblings.append(child_component)
# if we have a parent component for this child component (i.e. not top-level anonymous)
if parent_component:
# add the relationship in the component
self.components[parent_component].add_encapsulated(child_component)
self.components[child_component].set_parent(parent_component)
# descend into this <component_ref> tag to handle any children
self._handle_component_ref(component_ref_element, child_component)
# if there are siblings in this non-anonymous group
if parent_component and len(siblings) > 1:
# register each of the siblings with each other
for component_a, component_b in itertools.product(siblings, siblings):
if component_a != component_b:
self.components[component_a].add_sibling(component_b)
def _add_connections(self, model):
"""
:param model: an etree.Element
:return: dict mapping a connected variable to its source variable.
"""
connected_variable_mapping = {}
connection_elements = model.findall(with_ns(XmlNs.CELLML, 'connection'))
# a list to collect the (source, target) connection tuples
connections_to_process = deque()
# for each connection in the model
for connection in connection_elements:
# Should have one map_components and at least one map_variables. RELAXNGV makes use of this
map_components = connection.find(with_ns(XmlNs.CELLML, 'map_components'))
comp_1, comp_2 = (map_components.attrib.get('component_1'),
map_components.attrib.get('component_2'))
if comp_1 not in self.components:
raise ValueError(f'Cannot connect components that do not exist: {comp_1}!')
if comp_2 not in self.components:
raise ValueError(f'Cannot connect components that do not exist: {comp_2}!')
# go through all tags
for child in connection.findall(with_ns(XmlNs.CELLML, 'map_variables')):
connections_to_process.append(
self._determine_connection_direction(comp_1, child.attrib.get('variable_1'),
comp_2, child.attrib.get('variable_2'))
)
# we add the connection to the model by first connecting
# those variables we know are source variables
# keep processing the list of connections until we've done them all
unchanged_loop_count = 0
while connections_to_process:
# get a connection
connection = connections_to_process.popleft()
# if we did not successfully connect this variable (because, e.g., we don't know the
# source of *this* source)
source, target = connection
# Target should not already be assigned to
if target.assigned_to:
raise ValueError('Target already assigned to %s before assignment to %s' %
(target.assigned_to, source.assigned_to))
if not source.assigned_to:
# add it back to the list
connections_to_process.append(connection)
unchanged_loop_count += 1
else:
connected_variable_mapping[target.name] = source # Store connection for later when maths is added
# Check whether we need a unit conversion
cf = self.model.units.get_conversion_factor(from_unit=source.units, to_unit=target.units)
if cf == 1:
target.assigned_to = source.assigned_to # Direct substitution is possible
if target.cmeta_id is not None: # In case annotation is on target instead of source
self.model.transfer_cmeta_id(source=target, target=source)
else:
cf_quant = self.model.create_quantity(cf, target.units / source.units) # conversion factor quant
self.model.add_equation(sympy.Eq(target, source.assigned_to * cf_quant)) # Add connecting equation
target.assigned_to = target # The assigned variable for this variable is itself
unchanged_loop_count = 0
assert unchanged_loop_count <= len(connections_to_process), 'Unable to add connections to the model'
return connected_variable_mapping
def _determine_connection_direction(self, comp_1, var_1, comp_2, var_2):
"""Takes a CellML connection and attempts to resolve the connect by assigning the target
variable to the assigned source variable
Relevant lines from the CellML specification:
The set of all components immediately encapsulated by the current
component is the encapsulated set.
Other components encapsulated by the same parent make up the
sibling set.
The interface exposed to the parent component and components in
the sibling set is defined by the public_interface attribute. The
private_interface attribute defines the interface exposed to
components in the encapsulated set. Each interface has three possible
values: "in", "out", and "none", where "none" indicates the absence
of an interface.
"""
def _are_siblings(comp_a, comp_b):
return self.components[comp_a].parent == self.components[comp_b].parent
def _parent_of(parent_name, child_name):
return parent_name == self.components[child_name].parent
# get the variable information from the model about each end of the connection
variable_1 = self.model.get_variable_by_name(self._get_variable_name(comp_1, var_1))
variable_2 = self.model.get_variable_by_name(self._get_variable_name(comp_2, var_2))
# if the components are siblings (either same parent or top-level)
if _are_siblings(comp_1, comp_2):
# they are both connected on their public_interface
# Validation makese sure the public_interface are in/out
# and if they are equal it would trigger an error at the connection stage (Target already assigned)
if variable_1.public_interface == 'out':
return variable_1, variable_2
else:
return variable_2, variable_1
else:
# determine which component is parent of the other
if _parent_of(comp_1, comp_2):
parent_var, child_var = variable_1, variable_2
else:
parent_var, child_var = variable_2, variable_1
# parent/child components are connected using private/public interface, respectively
if child_var.public_interface == 'in' and parent_var.private_interface == 'out':
return parent_var, child_var
elif child_var.public_interface == 'out' and parent_var.private_interface == 'in':
return child_var, parent_var
raise ValueError('Cannot determine the source & target for connection (%s, %s) - (%s, %s)' %
(comp_1, var_1, comp_2, var_2))
def _validate(self, parser, tree):
"""
Validates the given lxml ``tree`` against the CellML 1.0 RELAX NG schema.
:param parser: An `lxml.etree.XMLParser`
:param tree: An `lxml.etree.ElementTree` made with the given parser.
"""
# Create RelaxNG object
path = os.path.join(os.path.dirname(__file__), 'data', 'cellml_1_0.rng')
rnc = etree.RelaxNG(etree.parse(path, parser))
# Validate
if not rnc.validate(tree):
msg = '. '.join([str(x) for x in rnc.error_log])
raise ValueError('Invalid or unsupported CellML file. ' + msg)
[docs]
class Transpiler(object):
"""
Handles conversion of MathmL to Sympy exprerssions.
:param symbol_generator: An optional method to create expressions for symbols.
Must have signature ``f(name) -> sympy.Basic``.
:param number_generator: An optional method to create expressions for numbers with units.
Must have signature ``f(value, unit) -> sympy.Basic``.
"""
def __init__(self, symbol_generator=None, number_generator=None):
# Create simple lambdas for symbol and number generators
if symbol_generator is None:
symbol_generator = lambda x: sympy.Symbol(x) # noqa: E731
if number_generator is None:
number_generator = lambda x, y: sympy.Float(x) # noqa: E731
# Store symbol and number generators
self.symbol_generator = symbol_generator
self.number_generator = number_generator
# Mapping MathML tag element names (keys) to appropriate handler for SymPy output (values)
# These tags require explicit handling because they have children or context etc.
self.handlers = {
'apply': self._apply_handler,
'bvar': self._bvar_handler,
'ci': self._ci_handler,
'cn': self._cn_handler,
'degree': self._degree_handler,
'diff': self._diff_handler,
'divide': self._divide_handler,
'log': self._log_handler,
'logbase': self._logbase_handler,
'math': self.transpile,
'minus': self._minus_handler,
'otherwise': self._otherwise_handler,
'piece': self._piece_handler,
'piecewise': self._piecewise_handler,
'power': self._power_handler,
'root': self._root_handler
}
# Add tags that can be handled by simple_operator_handler
for tag_name in SIMPLE_MATHML_TO_SYMPY_CLASSES:
self.handlers[tag_name] = self._simple_operator_handler
[docs]
@staticmethod
def set_mathml_handler(mathml_operator, operator_class):
"""Change how the transpiler handles a given mathml_operator.
:param mathml_operator: The name of a MathML operator e.g. 'exp', 'true' etc.
:param operator_class: A class that can handle the given operator e.g. ``sympy.exp``, or a function that creates
and returns a sympy object given the operands as arguments.
"""
SIMPLE_MATHML_TO_SYMPY_CLASSES[mathml_operator] = operator_class
[docs]
def parse_string(self, xml_string):
"""
Reads MathML content from a string and returns equivalent SymPy expressions.
:return: A list of SymPy expressions.
"""
parser = etree.XMLParser(no_network=True)
tree = etree.fromstring(xml_string, parser)
return self.parse_tree(tree)
[docs]
def parse_tree(self, math_element):
"""Accepts a <math> element and returns equivalent SymPy expressions.
Note: math_element must be the <math> ``Element``, not the root ``ElementTree``.
:param math_element: <math> ``etree.Element`` object
:return: A list of SymPy expressions.
"""
return self.transpile(math_element)
[docs]
def transpile(self, element):
"""Convert MathML to Sympy expressions.
Descends the given MathML element node, calling the corresponding handler for child
elements, and returns the appropriate SymPy expression.
:param element: an etree ``Element`` of parsed MathML
:return: a list of SymPy expressions
"""
# Collect the parsed expression(s) (i.e. SymPy output) into list
sympy_expressions = []
# For each child element of this element
for child_element in element.iterchildren(tag='*'):
# Call the appropriate MathML handler function for this tag
tag_name = etree.QName(child_element.tag).localname
if tag_name in self.handlers:
sympy_expressions.append(self.handlers[tag_name](child_element))
else:
# MathML handler function not found for this tag!
raise ValueError('No handler for element <%s>' % tag_name)
return sympy_expressions
# TOKEN ELEMENTS ###############################################################################
def _ci_handler(self, node):
"""MathML: https://www.w3.org/TR/MathML2/chapter4.html#contm.ci
SymPy: http://docs.sympy.org/latest/modules/core.html#id17
"""
identifier = node.text.strip()
return self.symbol_generator(identifier)
def _cn_handler(self, node):
"""MathML: https://www.w3.org/TR/MathML2/chapter4.html#contm.cn
SymPy: http://docs.sympy.org/latest/modules/core.html#number
"""
# If this number is using scientific notation
if 'type' in node.attrib:
if node.attrib['type'] == 'e-notation':
# A real number may also be presented in scientific notation. Such numbers have two
# parts (a mantissa and an exponent) separated by sep. The first part is a real
# number, while the second part is an integer exponent indicating a power of the
# base.. For example, 12.3<sep/>5 represents 12.3 times 10^5. The default
# presentation of this example is 12.3e5.
if len(node) == 1 and node[0].tag == with_ns(XmlNs.MATHML, 'sep'):
mantissa = node.text.strip()
exponent = int(node[0].tail.strip())
number = float('%se%d' % (mantissa, exponent))
else:
raise ValueError('Expecting '
'<cn type="e-notation">significand<sep/>exponent</cn>.'
'Got: ' + _dump_node(node))
else:
raise ValueError('Unimplemented type attribute for <cn>: ' + node.attrib['type'])
else:
number = float(node.text.strip())
# Get units, if given
# TODO: We're allowing these to _not_ be set for testing only. Maybe remove this option?
units = node.get(with_ns(XmlNs.CELLML, 'units'))
return self.number_generator(number, units)
# BASIC CONTENT ELEMENTS #######################################################################
def _apply_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.apply
"""
result = self.transpile(node)
if len(result) > 1:
expression = result[0](*(result[1:]))
else:
expression = result[0]
return expression
def _piecewise_handler(self, node):
"""MathML: https://www.w3.org/TR/MathML2/chapter4.html#contm.piecewise
SymPy: http://docs.sympy.org/latest/modules/functions/elementary.html#piecewise
constructor, zero or more <piece>, zero or one <otherwise>
"""
result = self.transpile(node)
return sympy.Piecewise(*result)
def _piece_handler(self, node):
"""MathML: https://www.w3.org/TR/MathML2/chapter4.html#contm.piecewise
Returns a 2-tuple defining an expression and condition
<piece> element contains exactly two children
"""
result = self.transpile(node)
if len(result) != 2:
raise ValueError('Need exactly 2 children for <piece>')
return result[0], result[1]
def _otherwise_handler(self, node):
"""MathML: https://www.w3.org/TR/MathML2/chapter4.html#contm.piecewise
Returns a 2-tuple defining an expression and condition
"""
result = self.transpile(node)
if len(result) != 1:
raise ValueError('More than 1 child for <otherwise>')
return result[0], True
# ARITHMETIC, ALGEBRA AND LOGIC ################################################################
def _minus_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.minus
unary arithmetic operator OR binary arithmetic operator
From http://docs.sympy.org/latest/tutorial/manipulation.html:
"There is no subtraction class in SymPy. x - y is represented as x + -y, or, more
completely, x + -1*y, i.e., Add(x, Mul(-1, y))."
* Negation (-a) is equivalent to sympy.Mul(sympy.S.NegativeOne, a)
* Subtraction (a - b) is equivalent to sympy.Add(a, sympy.Mul(sympy.S.NegativeOne, b))
"""
def _wrapped_minus(left_operand, right_operand=None):
if right_operand is None:
# unary arithmetic operator => negation
return -left_operand
# otherwise, binary arithmetic operator => subtraction
return left_operand - right_operand
return _wrapped_minus
def _divide_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.divide
binary arithmetic operator
There is no class in SymPy for division. Rather, division is represented by a power of -1.
Equivalent to sympy.Mul(a, sympy.Pow(b, sympy.S.NegativeOne))
"""
def _wrapped_divide(dividend, divisor):
return dividend / divisor
return _wrapped_divide
def _power_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.power
binary arithmetic operator
equivalent to sympy.Pow(a, b)
"""
def _wrapped_power(base, exponent):
return base ** exponent
return _wrapped_power
def _root_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.root
operator taking qualifiers
Nasty:
The root element is used to construct roots. The kind of root to be taken is specified by a
degree element, which should be given as the second child of the apply element enclosing the
root element. Thus, square roots correspond to the case where degree contains the value 2,
cube roots correspond to 3, and so on. If no degree is present, a default value of 2 is used
"""
def _wrapped_root(first_argument, second_argument=None):
# if no <degree> given, it's sqrt
if second_argument is None:
return sympy.root(first_argument, 2)
return sympy.root(second_argument, first_argument)
return _wrapped_root
def _degree_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.degree
Meaning of <degree> depends on context! We implement it for order of <bvar> in <diff> and
the kind of root in <root>
"""
result = self.transpile(node)
if len(result) != 1:
raise ValueError('Expected single value in <degree> tag.'
'Got: ' + _dump_node(node))
return result[0]
# CALCULUS AND VECTOR CALCULUS #################################################################
def _diff_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.diff
operator taking qualifiers
"""
def _wrapped_diff(x_symbol, y_symbol, evaluate=False):
if self._is_bool(x_symbol) or self._is_bool(y_symbol):
raise TypeError(f'Boolean not allowed in a Derivative: d{y_symbol} / d{x_symbol}')
# if bound variable element <bvar> contains <degree>, argument x_symbol is a list,
# otherwise, it is a symbol
if isinstance(x_symbol, list) and len(x_symbol) == 2:
bound_variable = x_symbol[0]
try:
order = int(x_symbol[1])
except TypeError:
raise TypeError(f'The degree of a derivative must be an int: d{y_symbol} / d{x_symbol}')
deriv = sympy.Derivative(y_symbol, bound_variable, order, evaluate=evaluate)
# Otherwise, first degree derivative
else:
deriv = sympy.Derivative(y_symbol, x_symbol, evaluate=evaluate)
return deriv
return _wrapped_diff
def _bvar_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.bvar
NASTY: bvar element depends on the context it is being used
In a derivative, it indicates the variable with respect to which a function is being
differentiated.
The bound variable <bvar> can also specify degree. In this case, we'll have two elements
"""
result = self.transpile(node)
if len(result) == 1:
# Bound variable without specifying degree
return result[0]
elif len(result) == 2:
return result
else:
raise ValueError('Do not know how to handle <bvar> ' + _dump_node(node))
# ELEMENTARY CLASSICAL FUNCTIONS ###############################################################
def _log_handler(self, node):
"""https://www.w3.org/TR/MathML2/chapter4.html#contm.log
operator taking qualifiers or a unary calculus operator
"""
def _wrapped_log(first_element, second_element=None):
if second_element is None:
# if no <logbase> element is present, the base is assumed to be 10
return sympy.log(first_element, 10)
# Has <logbase> element, which is the first_element after <log/>
return sympy.log(second_element, first_element)
return _wrapped_log
def _logbase_handler(self, node):
"""Qualifier for <log>
The log function accepts only the logbase schema. If present, the logbase schema denotes the
base with respect to which the logarithm is being taken. Otherwise, the log is assumed to be
base 10. When used with log, the logbase schema is expected to contain a single child schema
otherwise an error is generated.
Should be the first element following log, i.e. the second child of the containing apply
element.
"""
return self.transpile(node)[0]
def _get_nary_relation_callback(self, sympy_relation):
"""Wraps the Sympy binary relation to handle n-ary MathML relations
:param sympy_relation: handle for binary Sympy relation (Eq, Le, Lt, Ge, Gt)
:return: callback used by the apply_handler to handle n-ary relations
"""
def _wrapper_relational(*expressions):
# If the MathML relation is chaining more than 2 expressions
if len(expressions) > 2:
# Convert to multiple Sympy binary relations bundled in an 'And' boolean
relations = []
for first, second in zip(expressions[:-1], expressions[1:]):
relations.append(sympy_relation(first, second))
return sympy.And(*relations)
if sympy_relation in (sympy.Ge, sympy.Le, sympy.Gt, sympy.Lt) and\
any(map(self._is_bool, expressions)):
raise TypeError(f'Boolean not allowed in inequality: {expressions[0]} '
f'{sympy_relation} {expressions[1]}')
if sympy_relation in (sympy.Eq, sympy.Ne) and any(map(self._is_bool, expressions))\
and not all(map(self._is_bool, expressions)):
if any(map(lambda e: isinstance(e, sympy.Derivative), expressions)):
raise TypeError(f'Boolean not allowed in inequality: {expressions[0]} '
f'{sympy_relation} {expressions[1]}')
else:
logger.warning(f'Boolean used in part of (in)equality equation is this intentional?: '
f'{expressions[0]} {sympy_relation} {expressions[1]}')
return sympy_relation(*expressions)
return _wrapper_relational
def _simple_operator_handler(self, node):
"""This function handles simple MathML <tagName> to sympy.Class operators, where no unique
handling of tag children etc. is required.
"""
tag_name = etree.QName(node.tag).localname
handler = SIMPLE_MATHML_TO_SYMPY_CLASSES[tag_name]
# Some MathML relations allow chaining but Sympy relations are binary operations
if tag_name in MATHML_NARY_RELATIONS:
return self._get_nary_relation_callback(handler)
return handler
def _is_bool(self, expr):
"""This function checks whether expr is a boolean (True/False).
"""
return isinstance(expr, (sympy.logic.boolalg.BooleanTrue, sympy.logic.boolalg.BooleanFalse))
# These MathML tags map directly to Sympy classes and don't require any extra handling
_SIMPLE_MATHML_TO_SYMPY_CLASSES = {
'abs': sympy.Abs,
'and': sympy.And,
'arccos': sympy.acos,
'arccosh': sympy.acosh,
'arccot': sympy.acot,
'arccoth': sympy.acoth,
'arccsc': sympy.acsc,
'arccsch': sympy.acsch,
'arcsec': sympy.asec,
'arcsech': sympy.asech,
'arcsin': sympy.asin,
'arcsinh': sympy.asinh,
'arctan': sympy.atan,
'arctanh': sympy.atanh,
'ceiling': sympy.ceiling,
'cos': sympy.cos,
'cosh': sympy.cosh,
'cot': sympy.cot,
'coth': sympy.coth,
'csc': sympy.csc,
'csch': sympy.csch,
'eq': sympy.Eq,
'exp': sympy.exp,
'exponentiale': sympy.E,
'false': sympy.false,
'floor': sympy.floor,
'geq': sympy.Ge,
'gt': sympy.Gt,
'infinity': sympy.oo,
'leq': sympy.Le,
'ln': sympy.ln,
'lt': sympy.Lt,
'max': sympy.Max,
'min': sympy.Min,
'neq': sympy.Ne,
'not': sympy.Not,
'notanumber': sympy.nan,
'or': sympy.Or,
'pi': sympy.pi,
'plus': sympy.Add,
'rem': sympy.Mod,
'sec': sympy.sec,
'sech': sympy.sech,
'sin': sympy.sin,
'sinh': sympy.sinh,
'tan': sympy.tan,
'tanh': sympy.tanh,
'times': sympy.Mul,
'true': sympy.true,
'xor': sympy.Xor,
}
# MathML tags, to SYmpy mappings that can be changes (copy to allow original mapping to be accesible)
SIMPLE_MATHML_TO_SYMPY_CLASSES = _SIMPLE_MATHML_TO_SYMPY_CLASSES.copy()
# MathML relation elements that are n-ary operators
MATHML_NARY_RELATIONS = {'eq', 'leq', 'lt', 'geq', 'gt'}