|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | 2 | # Copyright (c) 2022 - 2023 chargebyte GmbH |
3 | 3 | # Copyright (c) 2022 - 2023 Contributors to EVerest |
| 4 | +import sys |
4 | 5 | from typing import Union |
5 | 6 |
|
6 | 7 | from xmlschema import XMLSchema11, XsdElement, XsdType, XsdAttribute |
|
15 | 16 | from cbexigen.tools_config import CONFIG_PARAMS, get_config_module |
16 | 17 |
|
17 | 18 |
|
| 19 | +def _canonical_attribute_sort_key(particle): |
| 20 | + """Sort key for canonical EXI attribute ordering per W3C Canonical EXI spec. |
| 21 | +
|
| 22 | + Order: xsi:type first, xsi:nil second, then lexicographic by (local_name, namespace_uri). |
| 23 | + Uses Unicode code point comparison (Python's default string comparison). |
| 24 | + """ |
| 25 | + XSI_NAMESPACE = 'http://www.w3.org/2001/XMLSchema-instance' |
| 26 | + |
| 27 | + if particle.namespace_uri == XSI_NAMESPACE and particle.name == 'type': |
| 28 | + return (0, '', '') # sort first |
| 29 | + elif particle.namespace_uri == XSI_NAMESPACE and particle.name == 'nil': |
| 30 | + return (1, '', '') # sort second |
| 31 | + else: |
| 32 | + return (2, particle.name, particle.namespace_uri) # lexicographic by (local_name, ns_uri) |
| 33 | + |
| 34 | + |
18 | 35 | class SchemaAnalyzer(object): |
19 | 36 |
|
20 | 37 | def __init__(self, schema, schema_base, analyzer_data: AnalyzerData, schema_prefix): |
@@ -336,6 +353,10 @@ def __get_particle_from_attribute(self, attribute: XsdAttribute): |
336 | 353 |
|
337 | 354 | particle.is_attribute = True |
338 | 355 |
|
| 356 | + # Store namespace URI for canonical EXI attribute ordering. |
| 357 | + # XsdAttribute inherits target_namespace from XsdComponent. |
| 358 | + particle.namespace_uri = attribute.target_namespace or '' |
| 359 | + |
339 | 360 | if attribute.use.casefold() == 'required': |
340 | 361 | particle.min_occurs = 1 |
341 | 362 | particle.max_occurs = 1 |
@@ -788,7 +809,10 @@ def __get_element_data(self, element: XsdElement, level, count, subst_list): |
788 | 809 | temp_list.append(self.__get_particle_from_attribute(attribute)) |
789 | 810 |
|
790 | 811 | if len(temp_list) > 1: |
791 | | - temp_list.sort(key=lambda item: item.name, reverse=False) |
| 812 | + if CONFIG_PARAMS.get('canonical_exi_enabled', 0) == 1: |
| 813 | + temp_list.sort(key=_canonical_attribute_sort_key, reverse=False) |
| 814 | + else: |
| 815 | + temp_list.sort(key=lambda item: item.name, reverse=False) |
792 | 816 | element_data.particles.extend(temp_list) |
793 | 817 |
|
794 | 818 | if element.type.content_type_label == 'simple': |
@@ -1080,25 +1104,51 @@ def __print_child_recursive(element_list, child_element: XsdElement): |
1080 | 1104 |
|
1081 | 1105 | # There are unused elements in the ISO-20 schema that are not yet included in the list of all elements |
1082 | 1106 | # for the fragment decoder and encoder. These elements can be determined via the components. |
1083 | | - # Therefore, we iterate through the components of the schema and the 1st level of imports and complete the list. |
1084 | | - # TODO: As only ISO-20 is currently affected and the only import of the individual schemas is the |
1085 | | - # CommonTypes schema, recursive processing is not used here. This should be changed if necessary. |
| 1107 | + # Therefore, we iterate through the components of the schema and ALL transitive imports recursively. |
| 1108 | + # This matches EXIficient's fragment grammar construction which includes all imported schema elements. |
1086 | 1109 | for component in self.__current_schema.iter_components(): |
1087 | 1110 | if isinstance(component, Xsd11Element): |
1088 | 1111 | if component.name not in fragments.keys(): |
1089 | 1112 | fragments[component.name] = __get_fragment(component) |
1090 | 1113 |
|
1091 | | - for import_item in self.__current_schema.imports.values(): |
1092 | | - imported_schema = XMLSchema11(import_item.name, base_url=self.__schema_base, build=True) |
1093 | | - for component in imported_schema.iter_components(): |
1094 | | - if isinstance(component, Xsd11Element): |
1095 | | - if component.name not in fragments.keys(): |
1096 | | - fragments[component.name] = __get_fragment(component) |
| 1114 | + # Recursive helper to process all transitive imports |
| 1115 | + def process_imports(schema, processed_schemas=None): |
| 1116 | + if processed_schemas is None: |
| 1117 | + processed_schemas = set() |
| 1118 | + |
| 1119 | + for import_item in schema.imports.values(): |
| 1120 | + # Avoid infinite loops in case of circular imports |
| 1121 | + import_path = import_item.name |
| 1122 | + if import_path in processed_schemas: |
| 1123 | + continue |
| 1124 | + processed_schemas.add(import_path) |
| 1125 | + |
| 1126 | + imported_schema = XMLSchema11(import_path, base_url=self.__schema_base, build=True) |
| 1127 | + |
| 1128 | + # Add elements from this imported schema |
| 1129 | + for component in imported_schema.iter_components(): |
| 1130 | + if isinstance(component, Xsd11Element): |
| 1131 | + if component.name not in fragments.keys(): |
| 1132 | + fragments[component.name] = __get_fragment(component) |
| 1133 | + |
| 1134 | + # Recursively process this schema's imports |
| 1135 | + process_imports(imported_schema, processed_schemas) |
| 1136 | + |
| 1137 | + # Process all imports recursively (levels 1, 2, 3, ...) |
| 1138 | + process_imports(self.__current_schema) |
1097 | 1139 |
|
1098 | 1140 | # Sort the list of elements and types by 1. name and 2. namespace |
1099 | 1141 | sorted_by_name = dict(sorted(fragments.items(), key=lambda item: (item[1].name, item[1].namespace))) |
1100 | 1142 | self.__known_fragments.update(sorted_by_name) |
1101 | 1143 |
|
| 1144 | + # Debug logging for fragment ordering (gated by environment variable) |
| 1145 | + import os |
| 1146 | + if os.environ.get('CBEXIGEN_DEBUG_FRAGMENTS') == '1': |
| 1147 | + print("\n=== cbexigen Fragment Grammar Debug ===", file=sys.stderr) |
| 1148 | + for index, (qname, fragment) in enumerate(sorted_by_name.items()): |
| 1149 | + print(f"FRAGMENT[{index}]: {fragment.name} | {fragment.namespace}", file=sys.stderr) |
| 1150 | + print(f"=== Total: {len(sorted_by_name)} fragments ===\n", file=sys.stderr) |
| 1151 | + |
1102 | 1152 | def __build_namespace_element_lists(self): |
1103 | 1153 | """ |
1104 | 1154 | This function builds the lists needed to generate the root struct and root decoding function. |
|
0 commit comments