66import pyarrow as pa
77import pyarrow .flight as flight
88import structlog
9- import zstandard as zstd
9+ from pydantic import BaseModel
1010
11- from . import schema_uploader
11+ from . import schema_uploader , server
1212
1313log = structlog .get_logger ()
1414
@@ -35,6 +35,28 @@ class SchemaInfo:
3535 tags : dict [str , Any ]
3636
3737
38+ class AirportSerializedContentsWithSHA256Hash (BaseModel ):
39+ # This is the sha256 hash of the serialized data
40+ sha256 : str
41+ # This is the url to the serialized data
42+ url : str | None
43+ # This is the serialized data, if we are doing inline serialization
44+ serialized : str | None
45+
46+
47+ class AirportSerializedSchema (BaseModel ):
48+ name : str
49+ description : str
50+ tags : dict [str , str ]
51+ contents : AirportSerializedContentsWithSHA256Hash
52+
53+
54+ class AirportSerializedCatalogRoot (BaseModel ):
55+ contents : AirportSerializedContentsWithSHA256Hash
56+ schemas : list [AirportSerializedSchema ]
57+ version_info : server .GetCatalogVersionResult
58+
59+
3860class FlightSchemaMetadata :
3961 def __init__ (
4062 self ,
@@ -83,7 +105,7 @@ def upload_and_generate_schema_list(
83105 catalog_version_fixed : bool ,
84106 enable_sha256_caching : bool = True ,
85107 serialize_inline : bool = False ,
86- ) -> bytes :
108+ ) -> AirportSerializedCatalogRoot :
87109 serialized_schema_data : list [dict [str , Any ]] = []
88110 s3_client = boto3 .client ("s3" )
89111 all_schema_flights_serialized : list [Any ] = []
@@ -128,7 +150,7 @@ def upload_and_generate_schema_list(
128150
129151 serialized_schema_data .append (
130152 {
131- "schema " : schema_name ,
153+ "name " : schema_name ,
132154 "description" : schema_details [schema_name ].description
133155 if schema_name in schema_details
134156 else "" ,
@@ -153,18 +175,14 @@ def upload_and_generate_schema_list(
153175 )
154176 all_schema_path = f"{ SCHEMA_BASE_URL } /{ all_schema_contents_upload .s3_path } "
155177
156- schemas_list_data = {
157- "schemas" : serialized_schema_data ,
158- # This encodes the contents of all schemas in one file.
159- "contents" : {
160- "sha256" : all_schema_contents_upload .sha256_hash ,
161- "url" : all_schema_path if not serialize_inline else None ,
162- "serialized" : all_schema_contents_upload .compressed_data if serialize_inline else None ,
163- },
164- "version_info" : [catalog_version , catalog_version_fixed ],
165- }
166-
167- packed_data = msgpack .packb (schemas_list_data )
168- compressor = zstd .ZstdCompressor (level = SCHEMA_TOP_LEVEL_COMPRESSION_LEVEL )
169- compressed_data = compressor .compress (packed_data )
170- return msgpack .packb ([len (packed_data ), compressed_data ])
178+ return AirportSerializedCatalogRoot (
179+ schemas = serialized_schema_data ,
180+ contents = AirportSerializedContentsWithSHA256Hash (
181+ sha256 = all_schema_contents_upload .sha256_hash ,
182+ url = all_schema_path if not serialize_inline else None ,
183+ serialized = all_schema_contents_upload .compressed_data if serialize_inline else None ,
184+ ),
185+ version_info = server .GetCatalogVersionResult (
186+ catalog_version = catalog_version , is_fixed = catalog_version_fixed
187+ ),
188+ )
0 commit comments