From d5855b440d680c9be534da17a52bc08910c9bea4 Mon Sep 17 00:00:00 2001 From: mysticmnd Date: Tue, 31 Mar 2026 11:17:33 +0530 Subject: [PATCH] feat(markdown): add Telegram MarkdownV2 support and fallbacks Improve cross-platform markdown compatibility by adding Telegram-specific escaping and graceful fallbacks for unsupported tags while refining CommonMark text handling; includes docs/tests updates and bumps package version to 5.3.0. --- README.md | 27 ++++++- dotnet | 0 src/ReverseMarkdown.Test/ConverterTests.cs | 78 +++++++++++++++++++ src/ReverseMarkdown/Config.cs | 5 ++ src/ReverseMarkdown/Converters/A.cs | 49 +++++++++++- src/ReverseMarkdown/Converters/Br.cs | 3 + src/ReverseMarkdown/Converters/Code.cs | 10 ++- src/ReverseMarkdown/Converters/Em.cs | 1 + src/ReverseMarkdown/Converters/H.cs | 12 +++ src/ReverseMarkdown/Converters/Hr.cs | 7 ++ src/ReverseMarkdown/Converters/Img.cs | 29 +++++++ src/ReverseMarkdown/Converters/Li.cs | 11 ++- src/ReverseMarkdown/Converters/Pre.cs | 8 +- src/ReverseMarkdown/Converters/S.cs | 4 +- src/ReverseMarkdown/Converters/Strong.cs | 1 + src/ReverseMarkdown/Converters/Sup.cs | 10 +++ src/ReverseMarkdown/Converters/Table.cs | 61 +++++++++++++++ src/ReverseMarkdown/Converters/Td.cs | 4 + src/ReverseMarkdown/Converters/Text.cs | 7 +- src/ReverseMarkdown/Converters/Tr.cs | 4 + src/ReverseMarkdown/Helpers/StringUtils.cs | 38 +++++++++ src/ReverseMarkdown/ReverseMarkdown.csproj | 2 +- .../UnsupportedTagExtension.cs | 7 ++ 23 files changed, 367 insertions(+), 11 deletions(-) create mode 100644 dotnet diff --git a/README.md b/README.md index 4550928..2a45739 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ If you have used and benefitted from this library. Please feel free to sponsor m **Markdown flavors** - GitHub Flavoured Markdown conversion for br, pre, tasklists, and table. Use `var config = new ReverseMarkdown.Config(githubFlavoured:true);`. By default the table will always be converted to Github flavored markdown immaterial of this flag - Slack Flavoured Markdown conversion. Use `var config = new ReverseMarkdown.Config { SlackFlavored = true };` +- Telegram MarkdownV2 conversion. Use `var config = new ReverseMarkdown.Config { TelegramMarkdownV2 = true };` - CommonMark-focused output with opt-in flags to preserve compatibility. Use `var config = new ReverseMarkdown.Config { CommonMark = true };` This mode may emit inline HTML for tricky emphasis/link cases unless you disable `CommonMarkUseHtmlInlineTags`. **Tables** @@ -101,18 +102,42 @@ If you need to preserve markdown-like text as literal content (for example `# He ```cs var config = new ReverseMarkdown.Config { - EscapeMarkdownLineStarts = true + EscapeMarkdownLineStarts = true, // or CommonMark = true }; var converter = new ReverseMarkdown.Converter(config); ``` +### Telegram MarkdownV2 mode + +When `TelegramMarkdownV2` is enabled, ReverseMarkdown applies Telegram-compatible formatting and escaping rules: + +```cs +var converter = new ReverseMarkdown.Converter(new ReverseMarkdown.Config +{ + TelegramMarkdownV2 = true +}); + +var html = "This is bold, italic, strikethrough and a_b[c]"; +var result = converter.Convert(html); +// This is *bold*, _italic_, ~strikethrough~ and [a\_b\[c\]](https://example.com/path_(one\)?q=1\)2) +``` + +Notes: + +- Text and link labels escape Telegram-reserved characters. +- Ordered and unordered list markers are escaped (`1\.` and `\-`). +- `` falls back to a link label (for example `[Image: alt](url)`). +- `` falls back to a preformatted code block representation. +- `` falls back to caret notation (for example `x^2`). + ## Configuration options * `DefaultCodeBlockLanguage` - Option to set the default code block language for Github style markdown if class based language markers are not available * `GithubFlavored` - Github style markdown for br, pre and table. Default is false * `SlackFlavored` - Slack style markdown formatting. When enabled, uses `*` for bold, `_` for italic, `~` for strikethrough, and `•` for list bullets. Default is false +* `TelegramMarkdownV2` - Telegram MarkdownV2 formatting and escaping rules. When enabled, output escapes Telegram-reserved characters and uses Telegram-compatible emphasis and link syntax. For unsupported Telegram constructs, ReverseMarkdown falls back to readable text (`` to link label, `
` to preformatted block, `` to caret notation). * `CommonMark` - Enable CommonMark-focused output rules. Default is false * `CommonMarkUseHtmlInlineTags` - When CommonMark is enabled, emit HTML for inline tags (`em`, `strong`, `a`, `img`) to avoid delimiter edge cases. Default is true * `CommonMarkIntrawordEmphasisSpacing` - When CommonMark is enabled, insert spaces to avoid intraword emphasis. Default is false diff --git a/dotnet b/dotnet new file mode 100644 index 0000000..e69de29 diff --git a/src/ReverseMarkdown.Test/ConverterTests.cs b/src/ReverseMarkdown.Test/ConverterTests.cs index 9676e1e..d3ae708 100644 --- a/src/ReverseMarkdown.Test/ConverterTests.cs +++ b/src/ReverseMarkdown.Test/ConverterTests.cs @@ -647,6 +647,82 @@ public void SlackFlavored_Unsupported_Hr() Assert.Throws(() => converter.Convert(html)); } + [Fact] + public void TelegramMarkdownV2_BasicFormatting() + { + var html = "This is bold, italic, strikethrough and a link"; + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert(html); + + Assert.Equal("This is *bold*, _italic_, ~strikethrough~ and [a link](https://example.com)", result); + } + + [Fact] + public void TelegramMarkdownV2_EscapeSpecialCharactersInText() + { + var html = "

Special _ * [ ] ( ) ~ ` > # + - = | { } . ! \\

"; + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert(html); + + Assert.Equal("Special \\_ \\* \\[ \\] \\( \\) \\~ \\` \\> \\# \\+ \\- \\= \\| \\{ \\} \\. \\! \\\\", result); + } + + [Fact] + public void TelegramMarkdownV2_EscapeLinkTextAndHref() + { + var html = "a_b[c]"; + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert(html); + + Assert.Equal("[a\\_b\\[c\\]](https://example.com/path_(one\\)?q=1\\)2)", result); + } + + [Fact] + public void TelegramMarkdownV2_EscapesListMarkers() + { + var html = "
  • Item 1
  1. Item 2
"; + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert(html); + + Assert.Contains("\\- Item 1", result); + Assert.Contains("1\\. Item 2", result); + } + + [Fact] + public void TelegramMarkdownV2_Img_FallsBackToLink() + { + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert(""); + + Assert.Equal("[Image](https://example.com/test.png)", result); + } + + [Fact] + public void TelegramMarkdownV2_Sup_FallsBackToCaretNotation() + { + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert("x2"); + + Assert.Equal("x^2", result); + } + + [Fact] + public void TelegramMarkdownV2_Table_FallsBackToCodeBlock() + { + var converter = new Converter(new Config { TelegramMarkdownV2 = true }); + + var result = converter.Convert("
value
"); + + Assert.Contains("```", result); + Assert.Contains("value", result); + } + [Fact] public void SlackFlavored_Unsupported_Img() { @@ -808,6 +884,7 @@ private static Config BuildConfig(CaseData testCase) ApplyBool(overrides.GithubFlavored, value => config.GithubFlavored = value); ApplyBool(overrides.SlackFlavored, value => config.SlackFlavored = value); + ApplyBool(overrides.TelegramMarkdownV2, value => config.TelegramMarkdownV2 = value); ApplyBool(overrides.CommonMark, value => config.CommonMark = value); ApplyBool(overrides.CommonMarkIntrawordEmphasisSpacing, value => config.CommonMarkIntrawordEmphasisSpacing = value); @@ -924,6 +1001,7 @@ public class CaseConfig { public bool? GithubFlavored { get; set; } public bool? SlackFlavored { get; set; } + public bool? TelegramMarkdownV2 { get; set; } public bool? CommonMark { get; set; } public bool? CommonMarkIntrawordEmphasisSpacing { get; set; } public bool? CommonMarkUseHtmlInlineTags { get; set; } diff --git a/src/ReverseMarkdown/Config.cs b/src/ReverseMarkdown/Config.cs index 5e6e229..329b13d 100644 --- a/src/ReverseMarkdown/Config.cs +++ b/src/ReverseMarkdown/Config.cs @@ -11,6 +11,11 @@ public class Config public bool SlackFlavored { get; set; } = false; + /// + /// Telegram MarkdownV2 conversion. + /// + public bool TelegramMarkdownV2 { get; set; } = false; + /// /// Enable CommonMark compatible emphasis handling (avoid intraword emphasis by inserting spaces). /// diff --git a/src/ReverseMarkdown/Converters/A.cs b/src/ReverseMarkdown/Converters/A.cs index 7b43617..29f1c0b 100644 --- a/src/ReverseMarkdown/Converters/A.cs +++ b/src/ReverseMarkdown/Converters/A.cs @@ -21,14 +21,20 @@ public A(Converter converter) : base(converter) public override void Convert(TextWriter writer, HtmlNode node) { var isCommonMark = Converter.Config.CommonMark; + var isTelegram = Converter.Config.TelegramMarkdownV2; var name = TreatChildrenAsString(node); - if (!isCommonMark) { + if (!isCommonMark && !isTelegram) { name = name.Trim(); } - else { + else if (isCommonMark) { name = name.ReplaceLineEndings(" "); } + if (isTelegram) { + ConvertTelegramMarkdownV2(writer, node, name); + return; + } + if (isCommonMark && node.FirstChild?.NodeType == HtmlNodeType.Text && (node.InnerText.Contains("\\", StringComparison.Ordinal) || @@ -200,6 +206,45 @@ public override void Convert(TextWriter writer, HtmlNode node) } } + private void ConvertTelegramMarkdownV2(TextWriter writer, HtmlNode node, string name) + { + var href = node.GetAttributeValue("href", string.Empty).Trim(); + var hasHrefAttribute = node.Attributes["href"] != null; + var escapedName = StringUtils.EscapeTelegramMarkdownV2(name); + + if (!hasHrefAttribute) { + writer.Write(escapedName); + return; + } + + var scheme = StringUtils.GetScheme(href); + + var isRemoveLinkWhenSameName = ( + Converter.Config.SmartHrefHandling && + scheme != string.Empty && + Uri.IsWellFormedUriString(href, UriKind.RelativeOrAbsolute) && ( + href.Equals(name, StringComparison.OrdinalIgnoreCase) || + href.Equals($"tel:{name}", StringComparison.OrdinalIgnoreCase) || + href.Equals($"mailto:{name}", StringComparison.OrdinalIgnoreCase) + ) + ); + + if (href.StartsWith("#", StringComparison.Ordinal) + || !Converter.Config.IsSchemeWhitelisted(scheme) + || isRemoveLinkWhenSameName + || string.IsNullOrEmpty(href)) { + writer.Write(escapedName); + return; + } + + var escapedHref = StringUtils.EscapeTelegramMarkdownV2LinkUrl(href); + writer.Write('['); + writer.Write(escapedName); + writer.Write("]("); + writer.Write(escapedHref); + writer.Write(')'); + } + private static void WriteRawHtmlAnchor(TextWriter writer, HtmlNode node, string text) { writer.Write(" 1, 'h2' -> 2, etc. var content = TreatChildrenAsString(node); + if (Converter.Config.TelegramMarkdownV2) { + writer.WriteLine(); + for (var i = 0; i < level; i++) { + writer.Write("\\#"); + } + + writer.Write(' '); + writer.Write(content); + writer.WriteLine(); + return; + } + if (Converter.Config.CommonMark) { content = content.ReplaceLineEndings(" "); content = EscapeTrailingHashes(content); diff --git a/src/ReverseMarkdown/Converters/Hr.cs b/src/ReverseMarkdown/Converters/Hr.cs index bf7d7c3..4a4d44c 100644 --- a/src/ReverseMarkdown/Converters/Hr.cs +++ b/src/ReverseMarkdown/Converters/Hr.cs @@ -15,6 +15,13 @@ public override void Convert(TextWriter writer, HtmlNode node) throw new SlackUnsupportedTagException(node.Name); } + if (Converter.Config.TelegramMarkdownV2) { + writer.WriteLine(); + writer.Write("\\-\\-\\-"); + writer.WriteLine(); + return; + } + writer.WriteLine(); writer.Write("* * *"); writer.WriteLine(); diff --git a/src/ReverseMarkdown/Converters/Img.cs b/src/ReverseMarkdown/Converters/Img.cs index 9269b76..0592bed 100644 --- a/src/ReverseMarkdown/Converters/Img.cs +++ b/src/ReverseMarkdown/Converters/Img.cs @@ -59,6 +59,11 @@ public override void Convert(TextWriter writer, HtmlNode node) } } + if (Converter.Config.TelegramMarkdownV2) { + WriteTelegramFallback(writer, alt, src, isBase64Image); + return; + } + if (Converter.Config.CommonMark && altAttribute == null) { writer.Write(node.OuterHtml); return; @@ -78,6 +83,30 @@ public override void Convert(TextWriter writer, HtmlNode node) writer.Write(')'); } + private void WriteTelegramFallback(TextWriter writer, string alt, string src, bool isBase64Image) + { + var label = string.IsNullOrWhiteSpace(alt) + ? "Image" + : $"Image: {alt}"; + + var escapedLabel = StringUtils.EscapeTelegramMarkdownV2(label); + var canRenderAsLink = !string.IsNullOrEmpty(src) && ( + !isBase64Image || + Converter.Config.Base64Images == Config.Base64ImageHandling.SaveToFile + ); + + if (!canRenderAsLink) { + writer.Write(escapedLabel); + return; + } + + writer.Write('['); + writer.Write(escapedLabel); + writer.Write("]("); + writer.Write(StringUtils.EscapeTelegramMarkdownV2LinkUrl(src)); + writer.Write(')'); + } + private string SaveBase64ImageToFile(string base64Src) { try diff --git a/src/ReverseMarkdown/Converters/Li.cs b/src/ReverseMarkdown/Converters/Li.cs index 47b44db..bd29226 100644 --- a/src/ReverseMarkdown/Converters/Li.cs +++ b/src/ReverseMarkdown/Converters/Li.cs @@ -2,6 +2,7 @@ using System.IO; using System.Linq; using HtmlAgilityPack; +using ReverseMarkdown.Helpers; namespace ReverseMarkdown.Converters { @@ -30,10 +31,16 @@ public override void Convert(TextWriter writer, HtmlNode node) var start = node.ParentNode.GetAttributeValue("start", 1); var index = node.ParentNode.SelectNodes("./li").IndexOf(node) + start; writer.Write(index); - writer.Write(". "); + writer.Write(Converter.Config.TelegramMarkdownV2 ? "\\. " : ". "); } else { - writer.Write(Converter.Config.ListBulletChar); + if (Converter.Config.TelegramMarkdownV2) { + writer.Write(StringUtils.EscapeTelegramMarkdownV2(Converter.Config.ListBulletChar.ToString())); + } + else { + writer.Write(Converter.Config.ListBulletChar); + } + writer.Write(' '); } diff --git a/src/ReverseMarkdown/Converters/Pre.cs b/src/ReverseMarkdown/Converters/Pre.cs index 53a1d19..98be495 100644 --- a/src/ReverseMarkdown/Converters/Pre.cs +++ b/src/ReverseMarkdown/Converters/Pre.cs @@ -19,9 +19,10 @@ public override void Convert(TextWriter writer, HtmlNode node) return; } - var isFencedCodeBlock = Converter.Config.GithubFlavored || Converter.Config.CommonMark; + var isTelegram = Converter.Config.TelegramMarkdownV2; + var isFencedCodeBlock = Converter.Config.GithubFlavored || Converter.Config.CommonMark || isTelegram; - var indentation = Converter.Config.CommonMark + var indentation = (Converter.Config.CommonMark || isTelegram) ? string.Empty : IndentationFor(node); var contentIndentation = indentation; @@ -37,6 +38,9 @@ public override void Convert(TextWriter writer, HtmlNode node) // content: var content = DecodeHtml(node.InnerText); + if (isTelegram) { + content = StringUtils.EscapeTelegramMarkdownV2Code(content); + } if (isFencedCodeBlock) { var fence = Converter.Config.CommonMark diff --git a/src/ReverseMarkdown/Converters/S.cs b/src/ReverseMarkdown/Converters/S.cs index e410301..931c3ac 100644 --- a/src/ReverseMarkdown/Converters/S.cs +++ b/src/ReverseMarkdown/Converters/S.cs @@ -25,7 +25,9 @@ public override void Convert(TextWriter writer, HtmlNode node) return; } - var emphasis = Converter.Config.SlackFlavored ? "~" : "~~"; + var emphasis = Converter.Config.SlackFlavored || Converter.Config.TelegramMarkdownV2 + ? "~" + : "~~"; TreatEmphasizeContentWhitespaceGuard(writer, content, emphasis); } diff --git a/src/ReverseMarkdown/Converters/Strong.cs b/src/ReverseMarkdown/Converters/Strong.cs index 7a87763..64f6ca0 100644 --- a/src/ReverseMarkdown/Converters/Strong.cs +++ b/src/ReverseMarkdown/Converters/Strong.cs @@ -50,6 +50,7 @@ public override void Convert(TextWriter writer, HtmlNode node) : ""; var emphasis = Converter.Config.SlackFlavored + || Converter.Config.TelegramMarkdownV2 ? "*" : isCommonMark && Context.AncestorsAny("strong") ? "__" diff --git a/src/ReverseMarkdown/Converters/Sup.cs b/src/ReverseMarkdown/Converters/Sup.cs index 41f2813..ea469ff 100644 --- a/src/ReverseMarkdown/Converters/Sup.cs +++ b/src/ReverseMarkdown/Converters/Sup.cs @@ -18,6 +18,16 @@ public override void Convert(TextWriter writer, HtmlNode node) var content = TreatChildrenAsString(node); + if (Converter.Config.TelegramMarkdownV2) { + if (string.IsNullOrEmpty(content)) { + return; + } + + writer.Write('^'); + writer.Write(content.Chomp()); + return; + } + if (string.IsNullOrEmpty(content) || AlreadySup()) { writer.Write(content); return; diff --git a/src/ReverseMarkdown/Converters/Table.cs b/src/ReverseMarkdown/Converters/Table.cs index f382873..31259a1 100644 --- a/src/ReverseMarkdown/Converters/Table.cs +++ b/src/ReverseMarkdown/Converters/Table.cs @@ -24,6 +24,11 @@ public override void Convert(TextWriter writer, HtmlNode node) throw new SlackUnsupportedTagException(node.Name); } + if (Converter.Config.TelegramMarkdownV2) { + WriteTelegramFallback(writer, node); + return; + } + // Tables inside tables are not supported as markdown, so leave as HTML if (Context.AncestorsAny("table")) { // Compact the nested table HTML to prevent breaking the markdown table @@ -60,6 +65,62 @@ public override void Convert(TextWriter writer, HtmlNode node) writer.WriteLine(); } + private static void WriteTelegramFallback(TextWriter writer, HtmlNode node) + { + var captionText = node.SelectSingleNode("caption")?.InnerText?.Trim(); + if (!string.IsNullOrEmpty(captionText)) { + writer.WriteLine(); + writer.WriteLine(StringUtils.EscapeTelegramMarkdownV2(captionText)); + } + + var rows = node.SelectNodes(".//tr"); + if (rows == null || rows.Count == 0) { + var plainText = HtmlEntity.DeEntitize(node.InnerText).Trim(); + if (!string.IsNullOrEmpty(plainText)) { + writer.Write(StringUtils.EscapeTelegramMarkdownV2(plainText)); + } + + return; + } + + var renderedRows = new List(rows.Count); + foreach (var row in rows) { + var cells = row.SelectNodes("./th|./td"); + if (cells == null || cells.Count == 0) { + var rowText = NormalizeWhitespace(row.InnerText); + if (!string.IsNullOrEmpty(rowText)) { + renderedRows.Add(rowText); + } + + continue; + } + + var cellTexts = cells + .Select(cell => NormalizeWhitespace(cell.InnerText)) + .ToArray(); + renderedRows.Add(string.Join(" | ", cellTexts)); + } + + if (renderedRows.Count == 0) { + return; + } + + writer.WriteLine(); + writer.WriteLine("```"); + foreach (var row in renderedRows) { + writer.WriteLine(StringUtils.EscapeTelegramMarkdownV2Code(row)); + } + + writer.Write("```"); + writer.WriteLine(); + } + + private static string NormalizeWhitespace(string value) + { + var decoded = HtmlEntity.DeEntitize(value); + return string.Join(" ", decoded.Split((char[]?)null, StringSplitOptions.RemoveEmptyEntries)); + } + private static bool HasNoTableHeaderRow(HtmlNode node) { var thNode = node.SelectNodes("//th")?.FirstOrDefault(); diff --git a/src/ReverseMarkdown/Converters/Td.cs b/src/ReverseMarkdown/Converters/Td.cs index c82cfb0..e05025c 100644 --- a/src/ReverseMarkdown/Converters/Td.cs +++ b/src/ReverseMarkdown/Converters/Td.cs @@ -16,6 +16,10 @@ public override void Convert(TextWriter writer, HtmlNode node) throw new SlackUnsupportedTagException(node.Name); } + if (Converter.Config.TelegramMarkdownV2) { + throw new TelegramUnsupportedTagException(node.Name); + } + var colSpan = GetColSpan(node); var content = TreatChildrenAsString(node) diff --git a/src/ReverseMarkdown/Converters/Text.cs b/src/ReverseMarkdown/Converters/Text.cs index 5c2129c..f16c590 100644 --- a/src/ReverseMarkdown/Converters/Text.cs +++ b/src/ReverseMarkdown/Converters/Text.cs @@ -88,6 +88,7 @@ public override void Convert(TextWriter writer, HtmlNode node) private void TreatText(TextWriter writer, HtmlNode node) { var isCommonMark = Converter.Config.CommonMark; + var isTelegram = Converter.Config.TelegramMarkdownV2; var rawText = isCommonMark ? node.OuterHtml : node.InnerText; @@ -152,6 +153,10 @@ parent.Name is "p" or "#document" && } } + if (isTelegram && parent.Name != "a") { + content = StringUtils.EscapeTelegramMarkdownV2(content); + } + if (shouldTrim) { content = content.Trim(); } @@ -168,7 +173,7 @@ parent.Name is "p" or "#document" && content = content.TrimStart('\r', '\n'); } - if (parent.Name != "a" && !Converter.Config.SlackFlavored) { + if (!isTelegram && parent.Name != "a" && !Converter.Config.SlackFlavored) { content = content.Replace(_escapedKeyChars); // Preserve Key Chars Within BackTicks: content = BackTicks().Replace(content, p => p.Value.Replace(_escapedKeyCharsReverse)); diff --git a/src/ReverseMarkdown/Converters/Tr.cs b/src/ReverseMarkdown/Converters/Tr.cs index d53d29e..676b420 100644 --- a/src/ReverseMarkdown/Converters/Tr.cs +++ b/src/ReverseMarkdown/Converters/Tr.cs @@ -17,6 +17,10 @@ public override void Convert(TextWriter writer, HtmlNode node) throw new SlackUnsupportedTagException(node.Name); } + if (Converter.Config.TelegramMarkdownV2) { + throw new TelegramUnsupportedTagException(node.Name); + } + var content = TreatChildrenAsString(node).TrimEnd(); if (string.IsNullOrWhiteSpace(content)) { diff --git a/src/ReverseMarkdown/Helpers/StringUtils.cs b/src/ReverseMarkdown/Helpers/StringUtils.cs index 8927e4b..2b14bde 100644 --- a/src/ReverseMarkdown/Helpers/StringUtils.cs +++ b/src/ReverseMarkdown/Helpers/StringUtils.cs @@ -1,12 +1,17 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Text; using System.Text.RegularExpressions; namespace ReverseMarkdown.Helpers; public static partial class StringUtils { + private static readonly HashSet TelegramMarkdownV2EscapableChars = new() { + '_', '*', '[', ']', '(', ')', '~', '`', '>', '#', '+', '-', '=', '|', '{', '}', '.', '!', '\\' + }; + /// /// Gets scheme for provided uri string to overcome different behavior between windows/linux. https://github.com/dotnet/corefx/issues/1745 /// Assume http for url starting with // @@ -68,4 +73,37 @@ public static Dictionary ParseStyle(string? style) .DistinctBy(styleParts => styleParts[0], StringComparer.OrdinalIgnoreCase) .ToDictionary(styleParts => styleParts[0], styleParts => styleParts[1], StringComparer.OrdinalIgnoreCase); } + + public static string EscapeTelegramMarkdownV2(string content) + { + return EscapeChars(content, c => TelegramMarkdownV2EscapableChars.Contains(c)); + } + + public static string EscapeTelegramMarkdownV2Code(string content) + { + return EscapeChars(content, c => c is '`' or '\\'); + } + + public static string EscapeTelegramMarkdownV2LinkUrl(string url) + { + return EscapeChars(url, c => c is ')' or '\\'); + } + + private static string EscapeChars(string content, Func mustEscape) + { + if (string.IsNullOrEmpty(content)) { + return content; + } + + var builder = new StringBuilder(content.Length + 16); + foreach (var c in content) { + if (mustEscape(c)) { + builder.Append('\\'); + } + + builder.Append(c); + } + + return builder.ToString(); + } } diff --git a/src/ReverseMarkdown/ReverseMarkdown.csproj b/src/ReverseMarkdown/ReverseMarkdown.csproj index 1d5f580..f5e8625 100644 --- a/src/ReverseMarkdown/ReverseMarkdown.csproj +++ b/src/ReverseMarkdown/ReverseMarkdown.csproj @@ -1,7 +1,7 @@  ReverseMarkdown is a Html to Markdown converter library in c# - 5.2.1 + 5.3.0 Babu Annamalai net8.0;net9.0;net10.0 preview diff --git a/src/ReverseMarkdown/UnsupportedTagExtension.cs b/src/ReverseMarkdown/UnsupportedTagExtension.cs index e975122..aeb177d 100644 --- a/src/ReverseMarkdown/UnsupportedTagExtension.cs +++ b/src/ReverseMarkdown/UnsupportedTagExtension.cs @@ -15,3 +15,10 @@ internal SlackUnsupportedTagException(string tagName) { } } + +public class TelegramUnsupportedTagException : UnsupportedTagException { + internal TelegramUnsupportedTagException(string tagName) + : base($"<{tagName}> tags cannot be converted to Telegram MarkdownV2") + { + } +}