-
Notifications
You must be signed in to change notification settings - Fork 72
Expand file tree
/
Copy pathSerializationUtil.java
More file actions
280 lines (253 loc) · 10.2 KB
/
SerializationUtil.java
File metadata and controls
280 lines (253 loc) · 10.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
package net.agkn.hll.serialization;
/*
* Copyright 2013 Aggregate Knowledge, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import net.agkn.hll.HLLType;
/**
* A collection of constants and utilities for serializing and deserializing
* HLLs.
*
* NOTE: 'package' visibility is used for many methods that only need to be
* used by the {@link ISchemaVersion} implementations. The structure of
* a serialized HLL's metadata should be opaque to the rest of the
* library.
*
* @author timon
*/
public class SerializationUtil {
/**
* The number of bits (of the parameters byte) dedicated to encoding the
* width of the registers.
*/
/*package*/ static int REGISTER_WIDTH_BITS = 3;
/**
* A mask to cap the maximum value of the register width.
*/
/*package*/ static int REGISTER_WIDTH_MASK = (1 << REGISTER_WIDTH_BITS) - 1;
/**
* The number of bits (of the parameters byte) dedicated to encoding
* <code>log2(registerCount)</code>.
*/
/*package*/ static int LOG2_REGISTER_COUNT_BITS = 5;
/**
* A mask to cap the maximum value of <code>log2(registerCount)</code>.
*/
/*package*/ static int LOG2_REGISTER_COUNT_MASK = (1 << LOG2_REGISTER_COUNT_BITS) - 1;
/**
* The number of bits (of the cutoff byte) dedicated to encoding the
* log-base-2 of the explicit cutoff or sentinel values for
* 'explicit-disabled' or 'auto'.
*/
/*package*/ static int EXPLICIT_CUTOFF_BITS = 6;
/**
* A mask to cap the maximum value of the explicit cutoff choice.
*/
/*package*/ static int EXPLICIT_CUTOFF_MASK = (1 << EXPLICIT_CUTOFF_BITS) - 1;
/**
* Number of bits in a nibble.
*/
private static int NIBBLE_BITS = 4;
/**
* A mask to cap the maximum value of a nibble.
*/
private static int NIBBLE_MASK = (1 << NIBBLE_BITS) - 1;
// ************************************************************************
// Serialization utilities
/**
* Schema version one (v1).
*/
public static ISchemaVersion VERSION_ONE = new SchemaVersionOne();
/**
* The default schema version for serializing HLLs.
*/
public static ISchemaVersion DEFAULT_SCHEMA_VERSION = VERSION_ONE;
/**
* List of registered schema versions, indexed by their version numbers. If
* an entry is <code>null</code>, then no such schema version is registered.
* Similarly, registering a new schema version simply entails assigning an
* {@link ISchemaVersion} instance to the appropriate index of this array.
*
* By default, only {@link SchemaVersionOne} is registered. Note that version
* zero will always be reserved for internal (e.g. proprietary, legacy) schema
* specifications/implementations and will never be assigned to in by this
* library.
*/
public static ISchemaVersion[] REGISTERED_SCHEMA_VERSIONS = new ISchemaVersion[16];
static {
REGISTERED_SCHEMA_VERSIONS[1] = VERSION_ONE;
}
/**
* @param schemaVersionNumber the version number of the {@link ISchemaVersion}
* desired. This must be a registered schema version number.
* @return The {@link ISchemaVersion} for the given number. This will never
* be <code>null</code>.
*/
public static ISchemaVersion getSchemaVersion(final int schemaVersionNumber) {
if(schemaVersionNumber >= REGISTERED_SCHEMA_VERSIONS.length || schemaVersionNumber < 0) {
throw new RuntimeException("Invalid schema version number " + schemaVersionNumber);
}
final ISchemaVersion schemaVersion = REGISTERED_SCHEMA_VERSIONS[schemaVersionNumber];
if(schemaVersion == null) {
throw new RuntimeException("Unknown schema version number " + schemaVersionNumber);
}
return schemaVersion;
}
/**
* Get the appropriate {@link ISchemaVersion schema version} for the specified
* serialized HLL.
*
* @param bytes the serialized HLL whose schema version is desired.
* @return the schema version for the specified HLL. This will never
* be <code>null</code>.
*/
public static ISchemaVersion getSchemaVersion(final byte[] bytes) {
final byte versionByte = bytes[0];
final int schemaVersionNumber = schemaVersion(versionByte);
return getSchemaVersion(schemaVersionNumber);
}
// ************************************************************************
// Package-specific shared helpers
/**
* Generates a byte that encodes the schema version and the type ordinal
* of the HLL.
*
* The top nibble is the schema version and the bottom nibble is the type
* ordinal.
*
* @param schemaVersion the schema version to encode.
* @param typeOrdinal the type ordinal of the HLL to encode.
* @return the packed version byte
*/
public static byte packVersionByte(final int schemaVersion, final int typeOrdinal) {
return (byte)(((NIBBLE_MASK & schemaVersion) << NIBBLE_BITS) | (NIBBLE_MASK & typeOrdinal));
}
/**
* Generates a byte that encodes the log-base-2 of the explicit cutoff
* or sentinel values for 'explicit-disabled' or 'auto', as well as the
* boolean indicating whether to use {@link HLLType#SPARSE}
* in the promotion hierarchy.
*
* The top bit is always padding, the second highest bit indicates the
* 'sparse-enabled' boolean, and the lowest six bits encode the explicit
* cutoff value.
*
* @param explicitCutoff the explicit cutoff value to encode.
* <ul>
* <li>
* If 'explicit-disabled' is chosen, this value should be <code>0</code>.
* </li>
* <li>
* If 'auto' is chosen, this value should be <code>63</code>.
* </li>
* <li>
* If a cutoff of 2<sup>n</sup> is desired, for <code>0 <= n < 31</code>,
* this value should be <code>n + 1</code>.
* </li>
* </ul>
* @param sparseEnabled whether {@link HLLType#SPARSE}
* should be used in the promotion hierarchy to improve HLL
* storage.
*
* @return the packed cutoff byte
*/
public static byte packCutoffByte(final int explicitCutoff, final boolean sparseEnabled) {
final int sparseBit = (sparseEnabled ? (1 << EXPLICIT_CUTOFF_BITS) : 0);
return (byte)(sparseBit | (EXPLICIT_CUTOFF_MASK & explicitCutoff));
}
/**
* Generates a byte that encodes the parameters of a
* {@link HLLType#FULL} or {@link HLLType#SPARSE}
* HLL.
*
* The top 3 bits are used to encode <code>registerWidth - 1</code>
* (range of <code>registerWidth</code> is thus 1-9) and the bottom 5
* bits are used to encode <code>registerCountLog2</code>
* (range of <code>registerCountLog2</code> is thus 0-31).
*
* @param registerWidth the register width (must be at least 1 and at
* most 9)
* @param registerCountLog2 the log-base-2 of the register count (must
* be at least 0 and at most 31)
* @return the packed parameters byte
*/
public static byte packParametersByte(final int registerWidth, final int registerCountLog2) {
final int widthBits = ((registerWidth - 1) & REGISTER_WIDTH_MASK);
final int countBits = (registerCountLog2 & LOG2_REGISTER_COUNT_MASK);
return (byte)((widthBits << LOG2_REGISTER_COUNT_BITS) | countBits);
}
/**
* Extracts the 'sparse-enabled' boolean from the cutoff byte of a serialized
* HLL.
*
* @param cutoffByte the cutoff byte of the serialized HLL
* @return the 'sparse-enabled' boolean
*/
public static boolean sparseEnabled(final byte cutoffByte) {
return ((cutoffByte >>> EXPLICIT_CUTOFF_BITS) & 1) == 1;
}
/**
* Extracts the explicit cutoff value from the cutoff byte of a serialized
* HLL.
*
* @param cutoffByte the cutoff byte of the serialized HLL
* @return the explicit cutoff value
*/
public static int explicitCutoff(final byte cutoffByte) {
return (cutoffByte & EXPLICIT_CUTOFF_MASK);
}
/**
* Extracts the schema version from the version byte of a serialized
* HLL.
*
* @param versionByte the version byte of the serialized HLL
* @return the schema version of the serialized HLL
*/
public static int schemaVersion(final byte versionByte) {
return NIBBLE_MASK & (versionByte >>> NIBBLE_BITS);
}
/**
* Extracts the type ordinal from the version byte of a serialized HLL.
*
* @param versionByte the version byte of the serialized HLL
* @return the type ordinal of the serialized HLL
*/
public static int typeOrdinal(final byte versionByte) {
return (versionByte & NIBBLE_MASK);
}
/**
* Extracts the register width from the parameters byte of a serialized
* {@link HLLType#FULL} HLL.
*
* @param parametersByte the parameters byte of the serialized HLL
* @return the register width of the serialized HLL
*
* @see #packParametersByte(int, int)
*/
public static int registerWidth(final byte parametersByte) {
return ((parametersByte >>> LOG2_REGISTER_COUNT_BITS) & REGISTER_WIDTH_MASK) + 1;
}
/**
* Extracts the log2(registerCount) from the parameters byte of a
* serialized {@link HLLType#FULL} HLL.
*
* @param parametersByte the parameters byte of the serialized HLL
* @return log2(registerCount) of the serialized HLL
*
* @see #packParametersByte(int, int)
*/
public static int registerCountLog2(final byte parametersByte) {
return (parametersByte & LOG2_REGISTER_COUNT_MASK);
}
}