Skip to content

Commit 0ff7b97

Browse files
ChALkeRmertcanaltin
andcommitted
src: move all 1-byte encodings to native
Co-authored-by: Mert Can Altin <mertgold60@gmail.com>
1 parent 77e8d44 commit 0ff7b97

8 files changed

Lines changed: 524 additions & 160 deletions

File tree

lib/internal/encoding.js

Lines changed: 47 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
// https://encoding.spec.whatwg.org
55

66
const {
7+
ArrayPrototypeMap,
78
Boolean,
89
ObjectDefineProperties,
910
ObjectGetOwnPropertyDescriptors,
1011
ObjectSetPrototypeOf,
1112
ObjectValues,
13+
SafeArrayIterator,
1214
SafeMap,
1315
StringPrototypeSlice,
1416
Symbol,
@@ -32,8 +34,6 @@ const kFatal = Symbol('kFatal');
3234
const kUTF8FastPath = Symbol('kUTF8FastPath');
3335
const kIgnoreBOM = Symbol('kIgnoreBOM');
3436

35-
const { isSinglebyteEncoding, createSinglebyteDecoder } = require('internal/encoding/single-byte');
36-
3737
const {
3838
getConstructorOf,
3939
customInspectSymbol: inspect,
@@ -58,6 +58,7 @@ const {
5858
encodeIntoResults,
5959
encodeUtf8String,
6060
decodeUTF8,
61+
decodeSingleByte,
6162
} = binding;
6263

6364
function validateDecoder(obj) {
@@ -71,6 +72,47 @@ const CONVERTER_FLAGS_IGNORE_BOM = 0x4;
7172

7273
const empty = new FastBuffer();
7374

75+
// Has to be synced with src/
76+
const encodingsSinglebyte = new SafeMap(new SafeArrayIterator(ArrayPrototypeMap([
77+
'ibm866',
78+
'koi8-r',
79+
'koi8-u',
80+
'macintosh',
81+
'x-mac-cyrillic',
82+
'iso-8859-2',
83+
'iso-8859-3',
84+
'iso-8859-4',
85+
'iso-8859-5',
86+
'iso-8859-6',
87+
'iso-8859-7',
88+
'iso-8859-8',
89+
'iso-8859-8-i',
90+
'iso-8859-10',
91+
'iso-8859-13',
92+
'iso-8859-14',
93+
'iso-8859-15',
94+
'iso-8859-16',
95+
'windows-874',
96+
'windows-1250',
97+
'windows-1251',
98+
'windows-1252',
99+
'windows-1253',
100+
'windows-1254',
101+
'windows-1255',
102+
'windows-1256',
103+
'windows-1257',
104+
'windows-1258',
105+
'x-user-defined', // Has to be last, special case
106+
], (e, i) => [e, i])));
107+
108+
const isSinglebyteEncoding = (enc) => encodingsSinglebyte.has(enc);
109+
110+
function createSinglebyteDecoder(encoding, fatal) {
111+
const key = encodingsSinglebyte.get(encoding);
112+
if (key === undefined) throw new ERR_ENCODING_NOT_SUPPORTED(encoding);
113+
return (buf) => decodeSingleByte(buf, key, fatal);
114+
}
115+
74116
const encodings = new SafeMap([
75117
['unicode-1-1-utf-8', 'utf-8'],
76118
['unicode11utf8', 'utf-8'],
@@ -462,7 +504,7 @@ function makeTextDecoderICU() {
462504
validateDecoder(this);
463505
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
464506

465-
if (this[kMethod]) return this[kMethod](parseInput(input));
507+
if (this[kMethod]) return this[kMethod](input);
466508

467509
this[kUTF8FastPath] &&= !(options?.stream);
468510

@@ -532,11 +574,12 @@ function makeTextDecoderJS() {
532574

533575
decode(input = empty, options = kEmptyObject) {
534576
validateDecoder(this);
535-
input = parseInput(input);
536577
validateObject(options, 'options', kValidateObjectAllowObjectsAndNull);
537578

538579
if (this[kMethod]) return this[kMethod](input);
539580

581+
input = parseInput(input);
582+
540583
if (this[kFlags] & CONVERTER_FLAGS_FLUSH) {
541584
this[kBOMSeen] = false;
542585
}

lib/internal/encoding/single-byte.js

Lines changed: 0 additions & 155 deletions
This file was deleted.

src/encoding_binding.cc

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#include "encoding_binding.h"
22
#include "ada.h"
3+
#include "encoding_singlebyte.h"
34
#include "env-inl.h"
45
#include "node_errors.h"
56
#include "node_external_reference.h"
@@ -391,6 +392,75 @@ void BindingData::DecodeUTF8(const FunctionCallbackInfo<Value>& args) {
391392
}
392393
}
393394

395+
void BindingData::DecodeSingleByte(const FunctionCallbackInfo<Value>& args) {
396+
Environment* env = Environment::GetCurrent(args);
397+
398+
CHECK_GE(args.Length(), 2);
399+
Isolate* isolate = env->isolate();
400+
401+
if (!(args[0]->IsArrayBuffer() || args[0]->IsSharedArrayBuffer() ||
402+
args[0]->IsArrayBufferView())) {
403+
return node::THROW_ERR_INVALID_ARG_TYPE(
404+
isolate,
405+
"The \"input\" argument must be an instance of SharedArrayBuffer, "
406+
"ArrayBuffer or ArrayBufferView.");
407+
}
408+
409+
static constexpr int kXUserDefined = 28; // Last one, see encoding.js
410+
411+
CHECK(args[1]->IsInt32());
412+
const int encoding = args[1].As<v8::Int32>()->Value();
413+
CHECK(encoding >= 0 && encoding <= kXUserDefined);
414+
415+
ArrayBufferViewContents<uint8_t> buffer(args[0]);
416+
const uint8_t* data = buffer.data();
417+
size_t length = buffer.length();
418+
419+
if (length == 0) return args.GetReturnValue().SetEmptyString();
420+
421+
const char* dataChar = reinterpret_cast<const char*>(data);
422+
if (!simdutf::validate_ascii_with_errors(dataChar, length).error) {
423+
Local<Value> ret;
424+
if (StringBytes::Encode(isolate, dataChar, length, LATIN1).ToLocal(&ret)) {
425+
args.GetReturnValue().Set(ret);
426+
}
427+
return;
428+
}
429+
430+
if (length > static_cast<size_t>(v8::String::kMaxLength)) {
431+
isolate->ThrowException(ERR_STRING_TOO_LONG(isolate));
432+
return;
433+
}
434+
435+
uint16_t* dst = node::UncheckedMalloc<uint16_t>(length);
436+
if (dst == nullptr) return node::THROW_ERR_MEMORY_ALLOCATION_FAILED(isolate);
437+
438+
if (encoding == kXUserDefined) {
439+
// x-user-defined
440+
for (size_t i = 0; i < length; i++) {
441+
dst[i] = data[i] >= 0x80 ? data[i] + 0xf700 : data[i];
442+
}
443+
} else {
444+
bool has_fatal = args[2]->IsTrue();
445+
446+
const uint16_t* table = tSingleByteEncodings[encoding];
447+
for (size_t i = 0; i < length; i++) dst[i] = table[data[i]];
448+
449+
const char16_t* dst16 = reinterpret_cast<char16_t*>(dst);
450+
if (has_fatal && fSingleByteEncodings[encoding] &&
451+
simdutf::find(dst16, dst16 + length, 0xfffd) != dst16 + length) {
452+
free(dst);
453+
return node::THROW_ERR_ENCODING_INVALID_ENCODED_DATA(
454+
isolate, "The encoded data was not valid for this encoding");
455+
}
456+
}
457+
458+
Local<Value> ret;
459+
if (StringBytes::Raw(isolate, dst, length).ToLocal(&ret)) {
460+
args.GetReturnValue().Set(ret);
461+
}
462+
}
463+
394464
void BindingData::ToASCII(const FunctionCallbackInfo<Value>& args) {
395465
Environment* env = Environment::GetCurrent(args);
396466
CHECK_GE(args.Length(), 1);
@@ -423,6 +493,7 @@ void BindingData::CreatePerIsolateProperties(IsolateData* isolate_data,
423493
SetMethod(isolate, target, "encodeInto", EncodeInto);
424494
SetMethodNoSideEffect(isolate, target, "encodeUtf8String", EncodeUtf8String);
425495
SetMethodNoSideEffect(isolate, target, "decodeUTF8", DecodeUTF8);
496+
SetMethodNoSideEffect(isolate, target, "decodeSingleByte", DecodeSingleByte);
426497
SetMethodNoSideEffect(isolate, target, "toASCII", ToASCII);
427498
SetMethodNoSideEffect(isolate, target, "toUnicode", ToUnicode);
428499
}
@@ -440,6 +511,7 @@ void BindingData::RegisterTimerExternalReferences(
440511
registry->Register(EncodeInto);
441512
registry->Register(EncodeUtf8String);
442513
registry->Register(DecodeUTF8);
514+
registry->Register(DecodeSingleByte);
443515
registry->Register(ToASCII);
444516
registry->Register(ToUnicode);
445517
}

0 commit comments

Comments
 (0)