From 99631e8de8421da6aaed07d0e2b04835308cc09d Mon Sep 17 00:00:00 2001 From: Mitchel Disveld Date: Tue, 5 Aug 2025 17:18:56 +0200 Subject: [PATCH 1/2] restore charset --- Source/Atlas/FontGeometry.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Atlas/FontGeometry.cs b/Source/Atlas/FontGeometry.cs index bbd6899..8a4df24 100644 --- a/Source/Atlas/FontGeometry.cs +++ b/Source/Atlas/FontGeometry.cs @@ -122,7 +122,7 @@ public int LoadGlyphset(Typeface face, double fontScale, Charset glyphset, bool /// /// Loads all glyphs in a charset (Charset elements are Unicode codepoints), returns the number of successfully loaded glyphs /// - public int LoadCharset(Typeface face, double fontScale, ReadOnlySpan charset, bool preprocessGeometry = true, bool enableKerning = true) + public int LoadCharset(Typeface face, double fontScale, Charset charset, bool preprocessGeometry = true, bool enableKerning = true) { if (!(glyphs.Count == rangeEnd && LoadMetrics(face, fontScale))) return -1; From 65fb5a6a00b9e30f15a0d3f1e23ffd617f49096c Mon Sep 17 00:00:00 2001 From: Mitchel Disveld Date: Tue, 5 Aug 2025 18:45:51 +0200 Subject: [PATCH 2/2] from heap to stack and separated concerns charset & parser --- Source/Atlas/Charset.cs | 113 +++-- Source/Atlas/CharsetParser.cs | 764 +++++++++++++++++++--------------- 2 files changed, 504 insertions(+), 373 deletions(-) diff --git a/Source/Atlas/Charset.cs b/Source/Atlas/Charset.cs index a61968e..5709998 100644 --- a/Source/Atlas/Charset.cs +++ b/Source/Atlas/Charset.cs @@ -1,44 +1,89 @@ -using System; -using System.Collections; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; - -namespace SharpMSDF.Atlas +using System.Collections; + +namespace SharpMSDF.Atlas; +/// +/// Represents a set of Unicode codepoints (characters) +/// +public struct Charset { - /// Represents a set of Unicode codepoints (characters) - public partial class Charset : IEnumerable - { - /// The set of the 95 printable ASCII characters - public readonly static Charset ASCII = CreateAsciiCharset(); + private SortedSet _codepoints; + + /// + /// The set of the 95 printable ASCII characters + /// + public static readonly SortedSet ASCII = CreateAsciiCharset(); + + private static SortedSet CreateAsciiCharset() + { + var ascii = new SortedSet(); + for (uint cp = 0x20; cp < 0x7f; ++cp) + { + ascii.Add(cp); + } + return ascii; + } + + public Charset() + { + _codepoints = new SortedSet(); + } + + public Charset(SortedSet codepoints) + { + _codepoints = codepoints ?? new SortedSet(); + } - static Charset CreateAsciiCharset() - { - Charset ascii = new(); - for (uint cp = 0x20; cp < 0x7f; ++cp) - ascii.Add(cp); - return ascii; - } + /// + /// Adds a codepoint + /// + public void Add(uint cp) + { + _codepoints.Add(cp); + } - /// - /// Adds a codepoint - /// - public void Add(uint cp) => _Codepoints.Add(cp); - /// - /// Removes a codepoint - /// - public void Remove(uint cp) => _Codepoints.Remove(cp); + /// + /// Removes a codepoint + /// + public void Remove(uint cp) + { + _codepoints.Remove(cp); + } - public int Size() => _Codepoints.Count; - public bool Empty() => _Codepoints.Count == 0; + public int Size() + { + return _codepoints.Count; + } - IEnumerator IEnumerable.GetEnumerator() => _Codepoints.GetEnumerator(); + public bool Empty() + { + return _codepoints.Count == 0; + } - IEnumerator IEnumerable.GetEnumerator() => _Codepoints.GetEnumerator(); + public SortedSet.Enumerator GetEnumerator() + { + return _codepoints.GetEnumerator(); + } - private SortedSet _Codepoints = []; + public SortedSet GetCodepoints() + { + return _codepoints; + } - }; + // Implicit conversion from ReadOnlySpan to Charset + public static implicit operator Charset(ReadOnlySpan chars) + { + var charset = new Charset(); + foreach (char ch in chars) + charset.Add(ch); + return charset; + } + // Implicit conversion from ReadOnlySpan to Charset + public static implicit operator Charset(ReadOnlySpan codepoints) + { + var charset = new Charset(); + foreach (uint cp in codepoints) + charset.Add(cp); + return charset; + } } diff --git a/Source/Atlas/CharsetParser.cs b/Source/Atlas/CharsetParser.cs index db68ffb..6afe8f8 100644 --- a/Source/Atlas/CharsetParser.cs +++ b/Source/Atlas/CharsetParser.cs @@ -1,341 +1,427 @@ -using System; -using System.Collections.Generic; -using System.IO; -using System.Linq; -using System.Runtime.CompilerServices; -using System.Text; -using System.Threading.Tasks; - -namespace SharpMSDF.Atlas +using System.Text; + +namespace SharpMSDF.Atlas; + +/// +/// Static utility class for parsing charset files and strings +/// +public static class CharsetParser { - public partial class Charset - { - - public bool Load(string filename, bool disableCharLiterals = false) - { - using FileStream fs = File.OpenRead(filename); - CharsetUserData userData = new(this, filename, disableCharLiterals, fs); - return CharsetParse(userData, CharsetUserData.ReadChar, CharsetUserData .Add, CharsetUserData.Include, disableCharLiterals, false); - } - - public unsafe bool Parse(string str, bool disableCharLiterals = false) - { - fixed (char* chars = str) - { - CharsetUserData userData = new(this, &chars[0], &chars[str.Length]); - return CharsetParse(userData, CharsetUserData.ReadChar, CharsetUserData.Add, CharsetUserData.Include, disableCharLiterals, true); - } - } - - public static char EscapedChar(char c) => c switch - { - '0' => '\0', - 'n' or 'N' => '\n', - 'r' or 'R' => '\r', - 's' or 'S' => ' ', - 't' or 'T' => '\t', - _ => c - }; - - public static bool ParseInt(string str, out int result) - { - result = 0; - if (str.StartsWith("0x", StringComparison.OrdinalIgnoreCase)) - { - for (int i = 2; i < str.Length; ++i) - { - char c = str[i]; - if (c is >= '0' and <= '9') - result = (result << 4) + (c - '0'); - else if (c is >= 'A' and <= 'F') - result = (result << 4) + (c - 'A' + 10); - else if (c is >= 'a' and <= 'f') - result = (result << 4) + (c - 'a' + 10); - else - return false; - } - return true; - } - else - { - foreach (char c in str) - { - if (c is >= '0' and <= '9') - result = result * 10 + (c - '0'); - else - return false; - } - return true; - } - } - - private enum State - { - Clear, - Tight, - RangeBracket, - RangeStart, - RangeSeparator, - RangeEnd - } - - public static bool CharsetParse( - CharsetUserData userData, - ReadCharFunc readChar, - Action add, - Func include, - bool disableCharLiterals, - bool disableInclude - ) - { - State state = State.Clear; - var buffer = new StringBuilder(); - var unicodeBuffer = new List(); - uint rangeStart = 0; - bool start = true; - - for (int c = readChar(ref userData); c >= 0; start = false, c = readChar(ref userData)) - { - switch (c) - { - // --- Number literal --- - case >= '0' and <= '9': - if (state is not (State.Clear or State.RangeBracket or State.RangeSeparator)) - return false; - buffer.Append((char)c); - c = ReadWord(readChar, userData, buffer); - if (!ParseInt(buffer.ToString(), out int cp)) - return false; - switch (state) - { - case State.Clear: - if (cp >= 0) add(userData, (uint)cp); - state = State.Tight; - break; - case State.RangeBracket: - rangeStart = (uint)cp; - state = State.RangeStart; - break; - case State.RangeSeparator: - for (uint u = rangeStart; u <= (uint)cp; ++u) - add(userData, u); - state = State.RangeEnd; - break; - } - buffer.Clear(); - continue; // already have next c - - // --- Single char literal --- - case '\'': - if (!(state is State.Clear or State.RangeBracket or State.RangeSeparator) || disableCharLiterals) - return false; - if (!ReadString(readChar, userData, buffer, '\'')) - return false; - Utf8.Utf8Decode(unicodeBuffer, buffer.ToString()); - if (unicodeBuffer.Count != 1) - return false; - uint uc = unicodeBuffer[0]; - switch (state) - { - case State.Clear: - if (uc > 0) add(userData, uc); - state = State.Tight; - break; - case State.RangeBracket: - rangeStart = uc; - state = State.RangeStart; - break; - case State.RangeSeparator: - for (uint u = rangeStart; u <= uc; ++u) - add(userData, u); - state = State.RangeEnd; - break; - } - unicodeBuffer.Clear(); - buffer.Clear(); - break; - - // --- String literal --- - case '"': - if (state != State.Clear || disableCharLiterals) - return false; - if (!ReadString(readChar, userData, buffer, '"')) - return false; - Utf8.Utf8Decode(unicodeBuffer, buffer.ToString()); - foreach (var cp2 in unicodeBuffer) - add(userData, cp2); - unicodeBuffer.Clear(); - buffer.Clear(); - state = State.Tight; - break; - - // --- Range brackets --- - case '[': - if (state != State.Clear) return false; - state = State.RangeBracket; - break; - case ']': - if (state == State.RangeEnd) state = State.Tight; - else return false; - break; - - // --- Include directive --- - case '@': - if (state != State.Clear) return false; - c = ReadWord(readChar, userData, buffer); - if (buffer.ToString() == "include") - { - // skip whitespace - while (c is ' ' or '\t' or '\n' or '\r') - c = readChar(ref userData); - if (c != '"') return false; - buffer.Clear(); - if (!ReadString(readChar, userData, buffer, '"')) - return false; - if (!disableInclude) - include(userData, buffer.ToString()); - state = State.Tight; - } - else return false; - buffer.Clear(); - break; - - // --- Separators & whitespace --- - case ',': - case ';': - if (state is State.RangeStart) - state = State.RangeSeparator; - else if (state is not (State.Clear or State.Tight)) - return false; - goto case ' '; - case ' ': - case '\n': - case '\r': - case '\t': - if (state == State.Tight) - state = State.Clear; - break; - - // --- BOM at start --- - case 0xEF: - if (start && - readChar(ref userData) == 0xBB && - readChar(ref userData) == 0xBF) - { - break; - } - return false; - - // --- Anything else is error --- - default: - return false; - } - } - - return state == State.Clear || state == State.Tight; - } - - - private static bool ReadString(ReadCharFunc readChar, CharsetUserData userData, StringBuilder buffer, char terminator) - { - bool escape = false; - while (true) - { - int ci = readChar(ref userData); - if (ci < 0) return false; - char c = (char)ci; - if (escape) - { - buffer.Append(EscapedChar(c)); - escape = false; - } - else - { - if (c == terminator) return true; - if (c == '\\') { escape = true; } - else buffer.Append(c); - } - } - } - - public static string CombinePath(string basePath, string relPath) - { - if (Path.IsPathRooted(relPath)) - return relPath; - - string? dir = Path.GetDirectoryName(basePath); - return dir != null ? Path.Combine(dir, relPath) : relPath; - } - - private static int ReadWord(ReadCharFunc readChar, CharsetUserData userData, StringBuilder buffer) - { - while (true) - { - int c = readChar(ref userData); - if (char.IsLetterOrDigit((char)c) || c == '_') - buffer.Append((char)c); - else - return c; - } - } - } - - public delegate int ReadCharFunc(ref CharsetUserData charset); - - public unsafe struct CharsetUserData - { - public Charset Charset { get; set; } - - public readonly bool IsParseNotLoad; - - // Parse - public char* Cur; - public char* End; - - // Load - public string Filename; - public bool DisableCharLiterals; - public FileStream File; - - public CharsetUserData(Charset charset, char* cur, char* end) - { - IsParseNotLoad = true; - Charset = charset; - Cur = cur; - End = end; - } - public CharsetUserData(Charset charset, string filename, bool disableCharLiterals, FileStream file) - { - IsParseNotLoad = false; - Charset = charset; - Filename = filename; - DisableCharLiterals = disableCharLiterals; - File = file; - } - - public static int ReadChar(ref CharsetUserData ud) - { - if (ud.IsParseNotLoad) - { - // sus - return ud.Cur < ud.End ? *ud.Cur++ : -1; - } - - return ud.File.ReadByte(); - } - - public static void Add(CharsetUserData ud, uint codepoint) - { - ud.Charset.Add(codepoint); - } - - public static bool Include(CharsetUserData ud, string path) - { - if (ud.IsParseNotLoad) - return false; - - string fullPath = Charset.CombinePath(ud.Filename, path); - return ud.Charset.Load(fullPath, ud.DisableCharLiterals); - } - } + private enum State + { + Clear, + Tight, + RangeBracket, + RangeStart, + RangeSeparator, + RangeEnd + } + + public delegate int ReadCharFunc(ref T userData); + + public unsafe struct ParseUserData + { + public char* Cur; + public char* End; + + public ParseUserData(char* cur, char* end) + { + Cur = cur; + End = end; + } + + public static int ReadChar(ref ParseUserData ud) + { + return ud.Cur < ud.End ? *ud.Cur++ : -1; + } + } + + public readonly struct LoadUserData + { + public readonly string Filename; + public readonly bool DisableCharLiterals; + public readonly FileStream File; + + public LoadUserData(string filename, bool disableCharLiterals, FileStream file) + { + Filename = filename; + DisableCharLiterals = disableCharLiterals; + File = file; + } + + public static int ReadChar(ref LoadUserData ud) + { + return ud.File.ReadByte(); + } + } + + /// + /// Loads a charset from a file + /// + public static bool LoadFromFile(ref Charset charset, string filename, bool disableCharLiterals = false) + { + using var fs = File.OpenRead(filename); + var userData = new LoadUserData(filename, disableCharLiterals, fs); + return Parse(ref charset, ref userData, LoadUserData.ReadChar, disableCharLiterals, false); + } + + /// + /// Parses a charset from a string + /// + public static unsafe bool ParseFromString(ref Charset charset, string str, bool disableCharLiterals = false) + { + fixed (char* chars = str) + { + var userData = new ParseUserData(&chars[0], &chars[str.Length]); + return Parse(ref charset, ref userData, ParseUserData.ReadChar, disableCharLiterals, true); + } + } + + /// + /// Core parsing logic + /// + private static bool Parse( + ref Charset charset, + ref T userData, + ReadCharFunc readChar, + bool disableCharLiterals, + bool disableInclude) + { + var state = State.Clear; + var buffer = new StringBuilder(); + var unicodeBuffer = new List(); + uint rangeStart = 0; + bool start = true; + + for (int c = readChar(ref userData); c >= 0; start = false, c = readChar(ref userData)) + { + switch (c) + { + // --- Number literal --- + case >= '0' and <= '9': + if (state is not (State.Clear or State.RangeBracket or State.RangeSeparator)) + { + return false; + } + + buffer.Append((char)c); + c = ReadWord(readChar, ref userData, buffer); + if (!ParseInt(buffer.ToString(), out int cp)) + { + return false; + } + + switch (state) + { + case State.Clear: + if (cp >= 0) + { + charset.Add((uint)cp); + } + state = State.Tight; + break; + case State.RangeBracket: + rangeStart = (uint)cp; + state = State.RangeStart; + break; + case State.RangeSeparator: + for (uint u = rangeStart; u <= (uint)cp; ++u) + { + charset.Add(u); + } + state = State.RangeEnd; + break; + } + buffer.Clear(); + continue; // already have next c + + // --- Single char literal --- + case '\'': + if (!(state is State.Clear or State.RangeBracket or State.RangeSeparator) || disableCharLiterals) + { + return false; + } + + if (!ReadString(readChar, ref userData, buffer, '\'')) + { + return false; + } + + Utf8.Utf8Decode(unicodeBuffer, buffer.ToString()); + if (unicodeBuffer.Count != 1) + { + return false; + } + + uint uc = unicodeBuffer[0]; + switch (state) + { + case State.Clear: + if (uc > 0) + { + charset.Add(uc); + } + state = State.Tight; + break; + case State.RangeBracket: + rangeStart = uc; + state = State.RangeStart; + break; + case State.RangeSeparator: + for (uint u = rangeStart; u <= uc; ++u) + { + charset.Add(u); + } + state = State.RangeEnd; + break; + } + unicodeBuffer.Clear(); + buffer.Clear(); + break; + + // --- String literal --- + case '"': + if (state != State.Clear || disableCharLiterals) + { + return false; + } + + if (!ReadString(readChar, ref userData, buffer, '"')) + { + return false; + } + + Utf8.Utf8Decode(unicodeBuffer, buffer.ToString()); + foreach (uint cp2 in unicodeBuffer) + { + charset.Add(cp2); + } + + unicodeBuffer.Clear(); + buffer.Clear(); + state = State.Tight; + break; + + // --- Range brackets --- + case '[': + if (state != State.Clear) + { + return false; + } + state = State.RangeBracket; + break; + case ']': + if (state == State.RangeEnd) + { + state = State.Tight; + } + else + { + return false; + } + break; + + // --- Include directive --- + case '@': + if (state != State.Clear) + { + return false; + } + + c = ReadWord(readChar, ref userData, buffer); + if (buffer.ToString() == "include") + { + // skip whitespace + while (c is ' ' or '\t' or '\n' or '\r') + { + c = readChar(ref userData); + } + + if (c != '"') + { + return false; + } + + buffer.Clear(); + if (!ReadString(readChar, ref userData, buffer, '"')) + { + return false; + } + + if (!disableInclude && userData is LoadUserData loadData) + { + string fullPath = CombinePath(loadData.Filename, buffer.ToString()); + LoadFromFile(ref charset, fullPath, loadData.DisableCharLiterals); + } + + state = State.Tight; + } + else + { + return false; + } + + buffer.Clear(); + break; + + // --- Separators & whitespace --- + case ',': + case ';': + if (state is State.RangeStart) + { + state = State.RangeSeparator; + } + else if (state is not (State.Clear or State.Tight)) + { + return false; + } + goto case ' '; + case ' ': + case '\n': + case '\r': + case '\t': + if (state == State.Tight) + { + state = State.Clear; + } + break; + + // --- BOM at start --- + case 0xEF: + if (start && + readChar(ref userData) == 0xBB && + readChar(ref userData) == 0xBF) + { + break; + } + return false; + + // --- Anything else is error --- + default: + return false; + } + } + + return state is State.Clear or State.Tight; + } + + public static char EscapedChar(char c) + { + return c switch + { + '0' => '\0', + 'n' or 'N' => '\n', + 'r' or 'R' => '\r', + 's' or 'S' => ' ', + 't' or 'T' => '\t', + _ => c + }; + } + + public static bool ParseInt(string str, out int result) + { + result = 0; + if (str.StartsWith("0x", StringComparison.OrdinalIgnoreCase)) + { + for (int i = 2; i < str.Length; ++i) + { + char c = str[i]; + if (c is >= '0' and <= '9') + { + result = (result << 4) + (c - '0'); + } + else if (c is >= 'A' and <= 'F') + { + result = (result << 4) + c - 'A' + 10; + } + else if (c is >= 'a' and <= 'f') + { + result = (result << 4) + c - 'a' + 10; + } + else + { + return false; + } + } + return true; + } + else + { + foreach (char c in str) + { + if (c is >= '0' and <= '9') + { + result = (result * 10) + (c - '0'); + } + else + { + return false; + } + } + return true; + } + } + + private static bool ReadString(ReadCharFunc readChar, ref T userData, StringBuilder buffer, char terminator) + { + bool escape = false; + while (true) + { + int ci = readChar(ref userData); + if (ci < 0) + { + return false; + } + + char c = (char)ci; + if (escape) + { + buffer.Append(EscapedChar(c)); + escape = false; + } + else + { + if (c == terminator) + { + return true; + } + + if (c == '\\') + { + escape = true; + } + else + { + buffer.Append(c); + } + } + } + } + + public static string CombinePath(string basePath, string relPath) + { + if (Path.IsPathRooted(relPath)) + { + return relPath; + } + + string? dir = Path.GetDirectoryName(basePath); + return dir != null ? Path.Combine(dir, relPath) : relPath; + } + + private static int ReadWord(ReadCharFunc readChar, ref T userData, StringBuilder buffer) + { + while (true) + { + int c = readChar(ref userData); + if (char.IsLetterOrDigit((char)c) || c == '_') + { + buffer.Append((char)c); + } + else + { + return c; + } + } + } } \ No newline at end of file