-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathidxcpp.hpp
More file actions
205 lines (162 loc) · 5.04 KB
/
idxcpp.hpp
File metadata and controls
205 lines (162 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#pragma once
#include <filesystem>
#include <fstream>
#include <iostream>
#include <vector>
// TODO: Add a flag for endianness
// TODO: Data type consistency
// TODO: Columns can be bigger than size_t
// TODO: Add more tests
// TODO: Data endianness
// TODO: Optimize
// TODO: Backward c++ compability
#ifdef IDXCPP_CPP_BACKWARDS_COMPABILITY
#define IDXCPP_PATH const std::string&
#else
#define IDXCPP_PATH std::filesystem::path
#endif
namespace {
// From https://stackoverflow.com/a/4956493/15394064
template <typename T>
T swap_endian(T u) {
static_assert (CHAR_BIT == 8, "CHAR_BIT != 8");
union {
T u;
unsigned char u8[sizeof(T)];
} source, dest;
source.u = u;
for (size_t k = 0; k < sizeof(T); k++)
dest.u8[k] = source.u8[sizeof(T) - k - 1];
return dest.u;
}
}
namespace Idxcpp {
// Forward declarations
class Idx;
class IdxAccessor;
enum IdxDataType {
Unsigned_Byte = 0x08, // 1 Byte
Byte = 0x09, // 1 Byte
Short = 0x0B, // 2 Bytes
Int = 0x0C, // 4 bytes
Float = 0x0D, // 4 bytes
Double = 0x0E // 4 bytes
};
class IdxAccessor {
friend class Idx;
public:
IdxAccessor(Idx* idx, int scope, int i, char* ptr);
~IdxAccessor() = default;
size_t getSize() const noexcept { return size; }
void printData() const noexcept;
IdxAccessor operator[](int i);
// Conversion operators
operator unsigned char* () { return reinterpret_cast<unsigned char*>(ptr); }
operator char* () { return ptr; }
operator short* () { return reinterpret_cast<short*>(ptr); }
operator int* () { return reinterpret_cast<int*>(ptr); }
operator float* () { return reinterpret_cast<float*>(ptr); }
operator double* () { return reinterpret_cast<double*>(ptr); }
// Copy semantics
IdxAccessor(const IdxAccessor&) = default;
IdxAccessor& operator=(const IdxAccessor&) = default;
// Move semantics
IdxAccessor(IdxAccessor&&) noexcept = default;
IdxAccessor& operator=(IdxAccessor&&) noexcept = default;
private:
Idx* idx;
int scope;
char* ptr;
size_t size;
};
class Idx {
friend class IdxAccessor;
public:
explicit Idx(IDXCPP_PATH path);
~Idx() = default;
IdxAccessor operator[](int i);
// Returns the number of rows
std::uint32_t getRows() const noexcept { return dimensions[0]; }
// Returns the number of columns
size_t getColumns() const noexcept { return columns; }
// Returns the vector holding the size of each dimension
const std::vector<std::uint32_t>& getDimensions() const noexcept { return dimensions; }
// Returns the data type (IdxDataType Enum)
IdxDataType getDataType() const noexcept { return dataType; }
// Returns the vector holding the data
std::vector<char>& getData() noexcept { return data; }
// Copy semantics
Idx(const Idx&) = default;
Idx& operator=(const Idx&) = default;
// Move semantics
Idx(Idx&&) noexcept = default;
Idx& operator=(Idx&&) noexcept = default;
private:
IdxDataType dataType;
std::vector<char> data;
std::vector<std::uint32_t> dimensions;
std::size_t columns;
int dataTypeSize() const noexcept;
};
Idx::Idx(IDXCPP_PATH path) {
std::ifstream f(path, std::ios::binary);
if (!f.is_open())
throw std::exception("Could not open the IDX file!");
// Read the magic number
char mnumber[2];
f.seekg(2); // first 2 bytes are always zero so they can be skipped
f.read(mnumber, 2);
if (!f.good())
throw std::exception("Could not read the IDX magic number!");
dataType = static_cast<IdxDataType>(mnumber[0]); // 3rd byte is the data type
char nDimensions = mnumber[1]; // 4th byte is the number of dimensions
dimensions.resize(nDimensions);
// Each dimension size is represented with 4 bytes
f.seekg(4);
f.read((char*)dimensions.data(), nDimensions * 4); // 1st dimension will be the rows
for (auto& dim : dimensions) { // Data is in big-endian
dim = swap_endian<std::uint32_t>(dim);
}
columns = 1;
for (int i = 1; i < nDimensions; i++) { // 2nd dimension and higher dimensions are represented together as columns
columns *= dimensions[i];
}
int typeSize = dataTypeSize();
size_t dataSize = typeSize * getRows() * columns;
data.resize(dataSize);
f.seekg(4 + nDimensions * 4);
f.read(data.data(), dataSize);
}
inline IdxAccessor Idx::operator[](int i) {
return IdxAccessor(this, 1, i, data.data());
}
inline int Idx::dataTypeSize() const noexcept {
switch (dataType) {
case Unsigned_Byte:
return 1;
break;
case Byte:
return 1;
break;
case Short:
return 2;
break;
return 4;
}
}
IdxAccessor::IdxAccessor(Idx* idx, int scope, int i, char* ptr) : idx(idx), scope(scope), size(1), ptr(ptr) {
for (int j = scope; j < idx->getDimensions().size(); j++) {
size *= idx->getDimensions()[j];
}
this->ptr += i * size;
}
// Intended for debug purposes
inline void IdxAccessor::printData() const noexcept {
for (int i = 0; i < size; i++) {
std::cout << *((char*)ptr + i) << std::endl;
}
}
inline IdxAccessor IdxAccessor::operator[](int i) {
return IdxAccessor(idx, scope + 1, i, ptr);
}
}