-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlibrary.cpp
More file actions
94 lines (76 loc) · 2.76 KB
/
library.cpp
File metadata and controls
94 lines (76 loc) · 2.76 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include "library.h"
#include "sentencepiece_processor.h"
#include <vector>
#include <cstring>
#include <iostream>
#include "sentencepiece_trainer.h"
void* spp_create() {
return new sentencepiece::SentencePieceProcessor();
}
int spp_load(void* processor, const char* model_file) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
if (const auto status = sp->Load(model_file); status.ok()) {
return 0;
}
return 1;
}
int spp_encode(void* processor, const char* input, int** output) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
std::vector<int> ids;
if (const auto status = sp->Encode(input, &ids); !status.ok()) {
return -1;
}
const int length = ids.size();
*output = new int[length];
std::memcpy(*output, ids.data(), length * sizeof(int));
return length;
}
void spp_free_array(const int* arr) {
delete[] arr;
}
void spp_destroy(void* processor) {
const auto* sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
delete sp;
}
int spp_get_piece_size(void* processor) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->GetPieceSize();
}
int spp_piece_to_id(void* processor, const char *piece) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->PieceToId(piece);
}
const char* spp_id_to_piece(void *processor, int piece_id) {
static std::string last_piece;
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
last_piece = sp->IdToPiece(piece_id);
return last_piece.c_str();
}
bool spp_is_unknown(void *processor, const int piece) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->IsUnknown(piece);
}
bool spp_is_control(void *processor, const int piece) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->IsControl(piece);
}
int spp_bos_id(void *processor) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->bos_id();
}
int spp_eos_id(void *processor) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->eos_id();
}
int spp_pad_id(void *processor) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->pad_id();
}
int spp_unk_id(void *processor) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
return sp->unk_id();
}
void spp_set_encode_extra_options(void* processor, const char *piece) {
const auto sp = static_cast<sentencepiece::SentencePieceProcessor*>(processor);
sp->SetEncodeExtraOptions(piece);
}