feat: add Canonical EXI encoding support for ISO 15118-2/-20 Plug & Charge

jayharper · jayharper · commit 07c6f08e7997 · 2026-03-18T11:14:11.000-05:00
Add opt-in Canonical EXI code generation (canonical_exi_enabled config flag)
for ISO 15118-2 and ISO 15118-20 Plug &amp; Charge XML signature workflows.

When enabled, the generator produces a 2-mode runtime system:
- mode=0 (EXI_MODE_STANDARD): schema-informed standard EXI (0x80 header, bit-packed)
- mode=1 (EXI_MODE_CANONICAL): Canonical EXI for Plug &amp; Charge (0x80 header,
  bit-packed, type-aware encoding per W3C Canonical EXI specification)

Key changes:
- Runtime canonical_mode field in exi_bitstream_t struct, set by caller
- W3C-compliant canonical attribute ordering (xsi:type, xsi:nil, then
  lexicographic by local_name + namespace_uri)
- Recursive schema import processing for complete fragment grammar discovery
- String table partition prefix encoding for canonical string values
- base64Binary decoder fix for correct nesting depth
- New error codes for canonical decoding validation
- exi_types_encoder.h included in all encoder configurations

The feature is disabled by default (canonical_exi_enabled = 0). Set to 1
in config.py to generate canonical EXI code paths.

Validated against EXIficient reference implementation (8/8 fragment digest
match, RSA-SHA512 + ECDSA-SHA256 signature verification) and Keysight
SL1550A EVCC live hardware (byte-identical 2,247-byte canonical output).

Signed-off-by: jharper &lt;jharper@anl.gov&gt;
diff --git a/src/cbexigen/FileGenerator.py b/src/cbexigen/FileGenerator.py
@@ -113,7 +113,8 @@ def __generate_static_h(self, parameters):
             generator = tools_generator.get_generator()
             temp = generator.get_template(config['template'])
             code = temp.render(filename=config['filename'], filekey=config['identifier'],
-                               add_debug_code=self.__analyzer_data.add_debug_code_enabled)
+                               add_debug_code=self.__analyzer_data.add_debug_code_enabled,
+                               canonical_exi_enabled=tools_conf.CONFIG_PARAMS['canonical_exi_enabled'])
 
             tools.save_code_to_file(config['filename'], code, parameters['folder'])
         except KeyError as err:
@@ -131,7 +132,8 @@ def __generate_static_c(self, parameters):
             generator = tools_generator.get_generator()
             temp = generator.get_template(config['template'])
             code = temp.render(filename=config['filename'], filekey=config['identifier'],
-                               add_debug_code=self.__analyzer_data.add_debug_code_enabled)
+                               add_debug_code=self.__analyzer_data.add_debug_code_enabled,
+                               canonical_exi_enabled=tools_conf.CONFIG_PARAMS['canonical_exi_enabled'])
 
             tools.save_code_to_file(config['filename'], code, parameters['folder'])
         except KeyError as err:
diff --git a/src/cbexigen/SchemaAnalyzer.py b/src/cbexigen/SchemaAnalyzer.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright (c) 2022 - 2023 chargebyte GmbH
 # Copyright (c) 2022 - 2023 Contributors to EVerest
+import sys
 from typing import Union
 
 from xmlschema import XMLSchema11, XsdElement, XsdType, XsdAttribute
@@ -15,6 +16,22 @@
 from cbexigen.tools_config import CONFIG_PARAMS, get_config_module
 
 
+def _canonical_attribute_sort_key(particle):
+    """Sort key for canonical EXI attribute ordering per W3C Canonical EXI spec.
+
+    Order: xsi:type first, xsi:nil second, then lexicographic by (local_name, namespace_uri).
+    Uses Unicode code point comparison (Python's default string comparison).
+    """
+    XSI_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance'
+
+    if particle.namespace_uri == XSI_NAMESPACE and particle.name == 'type':
+        return (0, '', '')  # sort first
+    elif particle.namespace_uri == XSI_NAMESPACE and particle.name == 'nil':
+        return (1, '', '')  # sort second
+    else:
+        return (2, particle.name, particle.namespace_uri)  # lexicographic by (local_name, ns_uri)
+
+
 class SchemaAnalyzer(object):
 
     def __init__(self, schema, schema_base, analyzer_data: AnalyzerData, schema_prefix):
@@ -336,6 +353,10 @@ def __get_particle_from_attribute(self, attribute: XsdAttribute):
 
         particle.is_attribute = True
 
+        # Store namespace URI for canonical EXI attribute ordering.
+        # XsdAttribute inherits target_namespace from XsdComponent.
+        particle.namespace_uri = attribute.target_namespace or ''
+
         if attribute.use.casefold() == 'required':
             particle.min_occurs = 1
             particle.max_occurs = 1
@@ -788,7 +809,10 @@ def __get_element_data(self, element: XsdElement, level, count, subst_list):
                     temp_list.append(self.__get_particle_from_attribute(attribute))
 
                 if len(temp_list) > 1:
-                    temp_list.sort(key=lambda item: item.name, reverse=False)
+                    if CONFIG_PARAMS.get('canonical_exi_enabled', 0) == 1:
+                        temp_list.sort(key=_canonical_attribute_sort_key, reverse=False)
+                    else:
+                        temp_list.sort(key=lambda item: item.name, reverse=False)
                 element_data.particles.extend(temp_list)
 
                 if element.type.content_type_label == 'simple':
@@ -1080,25 +1104,51 @@ def __print_child_recursive(element_list, child_element: XsdElement):
 
         # There are unused elements in the ISO-20 schema that are not yet included in the list of all elements
         # for the fragment decoder and encoder. These elements can be determined via the components.
-        # Therefore, we iterate through the components of the schema and the 1st level of imports and complete the list.
-        # TODO: As only ISO-20 is currently affected and the only import of the individual schemas is the
-        #       CommonTypes schema, recursive processing is not used here. This should be changed if necessary.
+        # Therefore, we iterate through the components of the schema and ALL transitive imports recursively.
+        # This matches EXIficient's fragment grammar construction which includes all imported schema elements.
         for component in self.__current_schema.iter_components():
             if isinstance(component, Xsd11Element):
                 if component.name not in fragments.keys():
                     fragments[component.name] = __get_fragment(component)
 
-        for import_item in self.__current_schema.imports.values():
-            imported_schema = XMLSchema11(import_item.name, base_url=self.__schema_base, build=True)
-            for component in imported_schema.iter_components():
-                if isinstance(component, Xsd11Element):
-                    if component.name not in fragments.keys():
-                        fragments[component.name] = __get_fragment(component)
+        # Recursive helper to process all transitive imports
+        def process_imports(schema, processed_schemas=None):
+            if processed_schemas is None:
+                processed_schemas = set()
+
+            for import_item in schema.imports.values():
+                # Avoid infinite loops in case of circular imports
+                import_path = import_item.name
+                if import_path in processed_schemas:
+                    continue
+                processed_schemas.add(import_path)
+
+                imported_schema = XMLSchema11(import_path, base_url=self.__schema_base, build=True)
+
+                # Add elements from this imported schema
+                for component in imported_schema.iter_components():
+                    if isinstance(component, Xsd11Element):
+                        if component.name not in fragments.keys():
+                            fragments[component.name] = __get_fragment(component)
+
+                # Recursively process this schema's imports
+                process_imports(imported_schema, processed_schemas)
+
+        # Process all imports recursively (levels 1, 2, 3, ...)
+        process_imports(self.__current_schema)
 
         # Sort the list of elements and types by 1. name and 2. namespace
         sorted_by_name = dict(sorted(fragments.items(), key=lambda item: (item[1].name, item[1].namespace)))
         self.__known_fragments.update(sorted_by_name)
 
+        # Debug logging for fragment ordering (gated by environment variable)
+        import os
+        if os.environ.get('CBEXIGEN_DEBUG_FRAGMENTS') == '1':
+            print("\n=== cbexigen Fragment Grammar Debug ===", file=sys.stderr)
+            for index, (qname, fragment) in enumerate(sorted_by_name.items()):
+                print(f"FRAGMENT[{index}]: {fragment.name} | {fragment.namespace}", file=sys.stderr)
+            print(f"=== Total: {len(sorted_by_name)} fragments ===\n", file=sys.stderr)
+
     def __build_namespace_element_lists(self):
         """
             This function builds the lists needed to generate the root struct and root decoding function.
diff --git a/src/cbexigen/base_coder_classes.py b/src/cbexigen/base_coder_classes.py
@@ -425,6 +425,10 @@ def append_to_element_grammars(self, grammar: ElementGrammar, element_typename):
 
     def generate_element_grammars(self, element: ElementData):
         self.reset_element_grammars()
+        # NOTE: Attribute particles are pre-sorted by SchemaAnalyzer.
+        # When canonical_exi_enabled=1, attributes are sorted by (local_name, namespace_uri)
+        # with xsi:type first and xsi:nil second, per W3C Canonical EXI spec.
+        # Child element particles remain in schema-defined order.
         particle_is_part_of_sequence = False
 
         # if the current element type is in the namespace elements dict,
diff --git a/src/cbexigen/elementData.py b/src/cbexigen/elementData.py
@@ -56,6 +56,8 @@ class Particle:
     integer_is_unsigned: bool = False
     # additional info for the anyType particle
     process_content: str = None
+    # namespace URI for canonical EXI attribute ordering
+    namespace_uri: str = ''
 
     @property
     def max_occurs_old(self):
diff --git a/src/cbexigen/tools_config.py b/src/cbexigen/tools_config.py
@@ -22,6 +22,8 @@
 CONFIG_PARAMS: Dict[str, Union[str, int]] = {
     # add debug code while generating code
     'add_debug_code': 0,
+    # enable canonical EXI code generation
+    'canonical_exi_enabled': 0,
     # generate analysis tree while generating code
     'generate_analysis_tree': 0,
     'generate_analysis_tree_20': 0,
@@ -113,6 +115,11 @@ def process_config_parameters():
     if hasattr(config_module, 'add_debug_code'):
         CONFIG_PARAMS['add_debug_code'] = config_module.add_debug_code
 
+    ''' canonical EXI definitions '''
+    # canonical_exi_enabled
+    if hasattr(config_module, 'canonical_exi_enabled'):
+        CONFIG_PARAMS['canonical_exi_enabled'] = config_module.canonical_exi_enabled
+
     ''' analysis tree definitions '''
     # generate_analysis_tree
     if hasattr(config_module, 'generate_analysis_tree'):
diff --git a/src/config.py b/src/config.py
@@ -20,6 +20,13 @@
 # and create separate code for the debugging functions
 add_debug_code = 0
 
+# enable canonical EXI code generation
+# this will add a canonical_mode field to exi_bitstream_t
+# and enable Canonical EXI for Plug & Charge code paths in generated output
+# mode=0 (EXI_MODE_STANDARD): schema-informed standard EXI (0x80 header, bit-packed)
+# mode=1 (EXI_MODE_CANONICAL): Canonical EXI for Plug & Charge (0x80 header, bit-packed, type-aware)
+canonical_exi_enabled = 0
+
 # generate analysis tree while generating code
 # this will generate an analysis tree file starting from the root element
 # for the 15118-20 every message has its separate tree file
@@ -438,7 +445,8 @@
             'filename': 'iso2_msgDefEncoder.c',
             'identifier': 'ISO2_MSG_DEF_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso2_msgDefDatatypes.h', 'iso2_msgDefEncoder.h']
         }
     },
@@ -495,7 +503,8 @@
             'filename': 'iso20_CommonMessages_Encoder.c',
             'identifier': 'ISO20_COMMON_MESSAGES_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso20_CommonMessages_Datatypes.h', 'iso20_CommonMessages_Encoder.h']
         }
     },
@@ -551,7 +560,8 @@
             'filename': 'iso20_AC_Encoder.c',
             'identifier': 'ISO20_AC_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso20_AC_Datatypes.h', 'iso20_AC_Encoder.h']
         }
     },
@@ -608,7 +618,8 @@
             'filename': 'iso20_DC_Encoder.c',
             'identifier': 'ISO20_DC_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso20_DC_Datatypes.h', 'iso20_DC_Encoder.h']
         }
     },
@@ -665,7 +676,8 @@
             'filename': 'iso20_WPT_Encoder.c',
             'identifier': 'ISO20_WPT_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso20_WPT_Datatypes.h', 'iso20_WPT_Encoder.h']
         }
     },
@@ -722,7 +734,8 @@
             'filename': 'iso20_ACDP_Encoder.c',
             'identifier': 'ISO20_ACDP_ENCODER_C',
             'include_std_lib': ['stdint.h'],
-            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_error_codes.h', 'exi_header.h',
+            'include_other': ['exi_basetypes.h', 'exi_basetypes_encoder.h', 'exi_types_encoder.h',
+                              'exi_error_codes.h', 'exi_header.h',
                               'iso20_ACDP_Datatypes.h', 'iso20_ACDP_Encoder.h']
         }
     },
diff --git a/src/input/code_templates/c/decoder/DecodeTypeBase64BinarySimple.jinja b/src/input/code_templates/c/decoder/DecodeTypeBase64BinarySimple.jinja
@@ -1,13 +1,13 @@
 {{ indent * level }}{{ decode_comment }}
-{{ indent * level }}error = exi_basetypes_decoder_uint_16(stream, &{{ type_content_len }});
-{{ indent * level }}if (error == 0)
-{{ indent * level }}{
-{{ indent * (level + 1) }}error = exi_basetypes_decoder_bytes(stream, {{ type_content_len }}, &{{ type_content }}[0], {{ type_define }});
+{{ indent * (level + 1) }}error = exi_basetypes_decoder_uint_16(stream, &{{ type_content_len }});
 {{ indent * (level + 1) }}if (error == 0)
 {{ indent * (level + 1) }}{
+{{ indent * (level + 2) }}error = exi_basetypes_decoder_bytes(stream, {{ type_content_len }}, &{{ type_content }}[0], {{ type_define }});
+{{ indent * (level + 2) }}if (error == 0)
+{{ indent * (level + 2) }}{
 {%- if type_option == 1 %}
-{{ indent * (level + 2) }}{{ type_value }}_isUsed = 1u;
+{{ indent * (level + 3) }}{{ type_value }}_isUsed = 1u;
 {%- endif %}
-{{ indent * (level + 2) }}grammar_id = {{ next_grammar_id }};
+{{ indent * (level + 3) }}grammar_id = {{ next_grammar_id }};
+{{ indent * (level + 2) }}}
 {{ indent * (level + 1) }}}
-{{ indent * level }}}
diff --git a/src/input/code_templates/c/static_code/exi_basetypes_encoder.c.jinja b/src/input/code_templates/c/static_code/exi_basetypes_encoder.c.jinja
@@ -48,12 +48,8 @@ int exi_basetypes_encoder_bool(exi_bitstream_t* stream, int value)
         stream->status_callback(EXI_DEBUG__BASETYPES_ENCODE_BOOL, 0, (int)value, 0);
     }
 {% endif %}
-    int error;
     uint32_t bit = (value) ? 1 : 0;
-
-    error = exi_bitstream_write_bits(stream, 1, bit);
-
-    return error;
+    return exi_bitstream_write_bits(stream, 1, bit);
 }
 
 /*****************************************************************************
@@ -304,6 +300,18 @@ int exi_basetypes_encoder_signed(exi_bitstream_t* stream, const exi_signed_t* va
     return exi_basetypes_encoder_unsigned(stream, &value->data);
 }
 
+/*****************************************************************************
+ * interface functions - string length encoding
+ *****************************************************************************/
+{%- if canonical_exi_enabled == 1 %}
+int exi_basetypes_encoder_string_len(exi_bitstream_t* stream, uint16_t characters_len)
+{
+    /* Standard EXI string table miss encoding: length + 2 offset.
+     * Used by both EXI_MODE_STANDARD and EXI_MODE_CANONICAL (Plug & Charge profile). */
+    return exi_basetypes_encoder_uint_16(stream, (uint16_t)(characters_len + 2));
+}
+{%- endif %}
+
 /*****************************************************************************
  * interface functions - characters, string
  *****************************************************************************/
diff --git a/src/input/code_templates/c/static_code/exi_basetypes_encoder.h.jinja b/src/input/code_templates/c/static_code/exi_basetypes_encoder.h.jinja
@@ -95,4 +95,18 @@ int exi_basetypes_encoder_signed(exi_bitstream_t* stream, const exi_signed_t* va
  *
  */
 int exi_basetypes_encoder_characters(exi_bitstream_t* stream, size_t characters_len, const exi_character_t* characters, size_t characters_size);
+
+/**
+ * \brief       encoder for string length with canonical mode support
+ *
+ *              In canonical mode (Preserve.lexicalValues=true), writes string table
+ *              partition prefix before the string length per W3C EXI Section 7.1.3.
+ *              In non-canonical mode, writes just the string length.
+ *
+ * \param       stream          EXI bitstream
+ * \param       characters_len  length of the string
+ * \return                      NO_ERROR or error code
+ *
+ */
+int exi_basetypes_encoder_string_len(exi_bitstream_t* stream, uint16_t characters_len);
 {% endblock %}
diff --git a/src/input/code_templates/c/static_code/exi_bitstream.c.jinja b/src/input/code_templates/c/static_code/exi_bitstream.c.jinja
@@ -106,6 +106,9 @@ void exi_bitstream_init(exi_bitstream_t* stream, uint8_t* data, size_t data_size
     stream->_flag_byte_pos = data_offset;
 
     stream->status_callback = status_callback;
+{%- if canonical_exi_enabled == 1 %}
+    stream->canonical_mode = 0;
+{%- endif %}
 {%- if add_debug_code == 1 %}
 
     if (stream->status_callback)
diff --git a/src/input/code_templates/c/static_code/exi_bitstream.h.jinja b/src/input/code_templates/c/static_code/exi_bitstream.h.jinja
@@ -67,6 +67,16 @@ typedef struct exi_bitstream {
 
     /* Pointer to callback for reporting errors or logging if assigned */
     exi_status_callback status_callback;
+{%- if canonical_exi_enabled == 1 %}
+/* EXI encoding mode constants for canonical_mode field */
+#define EXI_MODE_STANDARD  0u   /* Schema-informed standard EXI (0x80 header, bit-packed) */
+#define EXI_MODE_CANONICAL 1u   /* Canonical EXI for Plug & Charge (0x80 header, bit-packed, type-aware) */
+
+    /* EXI encoding mode: EXI_MODE_STANDARD (0) or EXI_MODE_CANONICAL (1).
+     * Canonical EXI for Plug & Charge: 0x80 header, bit-packed, type-aware.
+     * Both modes use the simple 0x80 header; mode is set by the caller (encoder-side only). */
+    uint8_t canonical_mode;
+{%- endif %}
 } exi_bitstream_t;
 
 
diff --git a/src/input/code_templates/c/static_code/exi_error_codes.h.jinja b/src/input/code_templates/c/static_code/exi_error_codes.h.jinja
@@ -50,6 +50,10 @@
 #define EXI_ERROR__UNSUPPORTED_DATETIME_TYPE -211
 #define EXI_ERROR__UNSUPPORTED_CHARACTER_VALUE -212
 
+/* canonical decoding errors */
+#define EXI_ERROR__INVALID_LEXICAL_VALUE -220
+#define EXI_ERROR__NUMERIC_OVERFLOW -221
+
 //      fragment errors -230 to -259
 #define EXI_ERROR__INCORRECT_END_FRAGMENT_VALUE -230
 
diff --git a/src/input/code_templates/c/static_code/exi_header.c.jinja b/src/input/code_templates/c/static_code/exi_header.c.jinja
@@ -9,6 +9,9 @@
 {% block content %}
 int exi_header_write(exi_bitstream_t* stream)
 {
+    /* Both EXI_MODE_STANDARD (0) and EXI_MODE_CANONICAL (1) use the simple 0x80 header.
+     * Canonical EXI for Plug & Charge: 0x80 header, bit-packed, type-aware.
+     * The canonical_mode field is set by the caller (encoder-side only), not by header detection. */
     return exi_bitstream_write_bits(stream, EXI_SIMPLE_HEADER_BIT_SIZE, EXI_SIMPLE_HEADER_VALUE);
 }
 
@@ -35,6 +38,7 @@ int exi_header_read_and_check(exi_bitstream_t* stream)
     // - one Presence Bit for EXI Options "absence of options" 0b0
     // - EXI format version "Final version 1" 0b00000
     // results in eight header bits 0b10000000 = 0x80
+    // Both EXI_MODE_STANDARD and EXI_MODE_CANONICAL use the 0x80 header.
     if (header != EXI_SIMPLE_HEADER_VALUE) {
         result = EXI_ERROR__HEADER_INCORRECT;
     }
diff --git a/src/input/code_templates/c/static_code/exi_header.h.jinja b/src/input/code_templates/c/static_code/exi_header.h.jinja
@@ -9,6 +9,7 @@
 #define EXI_SIMPLE_HEADER_VALUE 0x80
 
 
+
 /**
  * \brief       Writes a simple EXI header (0x80)
  *
diff --git a/src/input/code_templates/c/static_code/exi_types_decoder.c.jinja b/src/input/code_templates/c/static_code/exi_types_decoder.c.jinja
diff --git a/src/input/code_templates/c/static_code/exi_types_decoder.h.jinja b/src/input/code_templates/c/static_code/exi_types_decoder.h.jinja