Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ If you have used and benefitted from this library. Please feel free to sponsor m
**Markdown flavors**
- GitHub Flavoured Markdown conversion for br, pre, tasklists, and table. Use `var config = new ReverseMarkdown.Config(githubFlavoured:true);`. By default the table will always be converted to Github flavored markdown immaterial of this flag
- Slack Flavoured Markdown conversion. Use `var config = new ReverseMarkdown.Config { SlackFlavored = true };`
- Telegram MarkdownV2 conversion. Use `var config = new ReverseMarkdown.Config { TelegramMarkdownV2 = true };`
- CommonMark-focused output with opt-in flags to preserve compatibility. Use `var config = new ReverseMarkdown.Config { CommonMark = true };` This mode may emit inline HTML for tricky emphasis/link cases unless you disable `CommonMarkUseHtmlInlineTags`.

**Tables**
Expand Down Expand Up @@ -101,18 +102,42 @@ If you need to preserve markdown-like text as literal content (for example `# He
```cs
var config = new ReverseMarkdown.Config
{
EscapeMarkdownLineStarts = true
EscapeMarkdownLineStarts = true,
// or CommonMark = true
};

var converter = new ReverseMarkdown.Converter(config);
```

### Telegram MarkdownV2 mode

When `TelegramMarkdownV2` is enabled, ReverseMarkdown applies Telegram-compatible formatting and escaping rules:

```cs
var converter = new ReverseMarkdown.Converter(new ReverseMarkdown.Config
{
TelegramMarkdownV2 = true
});

var html = "This is <strong>bold</strong>, <em>italic</em>, <del>strikethrough</del> and <a href=\"https://example.com/path_(one)?q=1)2\">a_b[c]</a>";
var result = converter.Convert(html);
// This is *bold*, _italic_, ~strikethrough~ and [a\_b\[c\]](https://example.com/path_(one\)?q=1\)2)
```

Notes:

- Text and link labels escape Telegram-reserved characters.
- Ordered and unordered list markers are escaped (`1\.` and `\-`).
- `<img>` falls back to a link label (for example `[Image: alt](url)`).
- `<table>` falls back to a preformatted code block representation.
- `<sup>` falls back to caret notation (for example `x^2`).

## Configuration options

* `DefaultCodeBlockLanguage` - Option to set the default code block language for Github style markdown if class based language markers are not available
* `GithubFlavored` - Github style markdown for br, pre and table. Default is false
* `SlackFlavored` - Slack style markdown formatting. When enabled, uses `*` for bold, `_` for italic, `~` for strikethrough, and `•` for list bullets. Default is false
* `TelegramMarkdownV2` - Telegram MarkdownV2 formatting and escaping rules. When enabled, output escapes Telegram-reserved characters and uses Telegram-compatible emphasis and link syntax. For unsupported Telegram constructs, ReverseMarkdown falls back to readable text (`<img>` to link label, `<table>` to preformatted block, `<sup>` to caret notation).
* `CommonMark` - Enable CommonMark-focused output rules. Default is false
* `CommonMarkUseHtmlInlineTags` - When CommonMark is enabled, emit HTML for inline tags (`em`, `strong`, `a`, `img`) to avoid delimiter edge cases. Default is true
* `CommonMarkIntrawordEmphasisSpacing` - When CommonMark is enabled, insert spaces to avoid intraword emphasis. Default is false
Expand Down
Empty file added dotnet
Empty file.
78 changes: 78 additions & 0 deletions src/ReverseMarkdown.Test/ConverterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,82 @@ public void SlackFlavored_Unsupported_Hr()
Assert.Throws<SlackUnsupportedTagException>(() => converter.Convert(html));
}

[Fact]
public void TelegramMarkdownV2_BasicFormatting()
{
var html = "This is <strong>bold</strong>, <em>italic</em>, <del>strikethrough</del> and <a href=\"https://example.com\">a link</a>";
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert(html);

Assert.Equal("This is *bold*, _italic_, ~strikethrough~ and [a link](https://example.com)", result);
}

[Fact]
public void TelegramMarkdownV2_EscapeSpecialCharactersInText()
{
var html = "<p>Special _ * [ ] ( ) ~ ` > # + - = | { } . ! \\</p>";
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert(html);

Assert.Equal("Special \\_ \\* \\[ \\] \\( \\) \\~ \\` \\> \\# \\+ \\- \\= \\| \\{ \\} \\. \\! \\\\", result);
}

[Fact]
public void TelegramMarkdownV2_EscapeLinkTextAndHref()
{
var html = "<a href=\"https://example.com/path_(one)?q=1)2\">a_b[c]</a>";
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert(html);

Assert.Equal("[a\\_b\\[c\\]](https://example.com/path_(one\\)?q=1\\)2)", result);
}

[Fact]
public void TelegramMarkdownV2_EscapesListMarkers()
{
var html = "<ul><li>Item 1</li></ul><ol><li>Item 2</li></ol>";
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert(html);

Assert.Contains("\\- Item 1", result);
Assert.Contains("1\\. Item 2", result);
}

[Fact]
public void TelegramMarkdownV2_Img_FallsBackToLink()
{
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert("<img src=\"https://example.com/test.png\" />");

Assert.Equal("[Image](https://example.com/test.png)", result);
}

[Fact]
public void TelegramMarkdownV2_Sup_FallsBackToCaretNotation()
{
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert("x<sup>2</sup>");

Assert.Equal("x^2", result);
}

[Fact]
public void TelegramMarkdownV2_Table_FallsBackToCodeBlock()
{
var converter = new Converter(new Config { TelegramMarkdownV2 = true });

var result = converter.Convert("<table><tr><td>value</td></tr></table>");

Assert.Contains("```", result);
Assert.Contains("value", result);
}

[Fact]
public void SlackFlavored_Unsupported_Img()
{
Expand Down Expand Up @@ -808,6 +884,7 @@ private static Config BuildConfig(CaseData testCase)

ApplyBool(overrides.GithubFlavored, value => config.GithubFlavored = value);
ApplyBool(overrides.SlackFlavored, value => config.SlackFlavored = value);
ApplyBool(overrides.TelegramMarkdownV2, value => config.TelegramMarkdownV2 = value);
ApplyBool(overrides.CommonMark, value => config.CommonMark = value);
ApplyBool(overrides.CommonMarkIntrawordEmphasisSpacing,
value => config.CommonMarkIntrawordEmphasisSpacing = value);
Expand Down Expand Up @@ -924,6 +1001,7 @@ public class CaseConfig
{
public bool? GithubFlavored { get; set; }
public bool? SlackFlavored { get; set; }
public bool? TelegramMarkdownV2 { get; set; }
public bool? CommonMark { get; set; }
public bool? CommonMarkIntrawordEmphasisSpacing { get; set; }
public bool? CommonMarkUseHtmlInlineTags { get; set; }
Expand Down
5 changes: 5 additions & 0 deletions src/ReverseMarkdown/Config.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ public class Config

public bool SlackFlavored { get; set; } = false;

/// <summary>
/// Telegram MarkdownV2 conversion.
/// </summary>
public bool TelegramMarkdownV2 { get; set; } = false;

/// <summary>
/// Enable CommonMark compatible emphasis handling (avoid intraword emphasis by inserting spaces).
/// </summary>
Expand Down
49 changes: 47 additions & 2 deletions src/ReverseMarkdown/Converters/A.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,20 @@ public A(Converter converter) : base(converter)
public override void Convert(TextWriter writer, HtmlNode node)
{
var isCommonMark = Converter.Config.CommonMark;
var isTelegram = Converter.Config.TelegramMarkdownV2;
var name = TreatChildrenAsString(node);
if (!isCommonMark) {
if (!isCommonMark && !isTelegram) {
name = name.Trim();
}
else {
else if (isCommonMark) {
name = name.ReplaceLineEndings("&#10;");
}

if (isTelegram) {
ConvertTelegramMarkdownV2(writer, node, name);
return;
}

if (isCommonMark &&
node.FirstChild?.NodeType == HtmlNodeType.Text &&
(node.InnerText.Contains("\\", StringComparison.Ordinal) ||
Expand Down Expand Up @@ -200,6 +206,45 @@ public override void Convert(TextWriter writer, HtmlNode node)
}
}

private void ConvertTelegramMarkdownV2(TextWriter writer, HtmlNode node, string name)
{
var href = node.GetAttributeValue("href", string.Empty).Trim();
var hasHrefAttribute = node.Attributes["href"] != null;
var escapedName = StringUtils.EscapeTelegramMarkdownV2(name);

if (!hasHrefAttribute) {
writer.Write(escapedName);
return;
}

var scheme = StringUtils.GetScheme(href);

var isRemoveLinkWhenSameName = (
Converter.Config.SmartHrefHandling &&
scheme != string.Empty &&
Uri.IsWellFormedUriString(href, UriKind.RelativeOrAbsolute) && (
href.Equals(name, StringComparison.OrdinalIgnoreCase) ||
href.Equals($"tel:{name}", StringComparison.OrdinalIgnoreCase) ||
href.Equals($"mailto:{name}", StringComparison.OrdinalIgnoreCase)
)
);

if (href.StartsWith("#", StringComparison.Ordinal)
|| !Converter.Config.IsSchemeWhitelisted(scheme)
|| isRemoveLinkWhenSameName
|| string.IsNullOrEmpty(href)) {
writer.Write(escapedName);
return;
}

var escapedHref = StringUtils.EscapeTelegramMarkdownV2LinkUrl(href);
writer.Write('[');
writer.Write(escapedName);
writer.Write("](");
writer.Write(escapedHref);
writer.Write(')');
}

private static void WriteRawHtmlAnchor(TextWriter writer, HtmlNode node, string text)
{
writer.Write("<a");
Expand Down
3 changes: 3 additions & 0 deletions src/ReverseMarkdown/Converters/Br.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ public override void Convert(TextWriter writer, HtmlNode node)
if (Converter.Config.CommonMark) {
writer.WriteLine("\\");
}
else if (Converter.Config.TelegramMarkdownV2) {
writer.WriteLine();
}
else if (Converter.Config.GithubFlavored) {
writer.WriteLine();
}
Expand Down
10 changes: 9 additions & 1 deletion src/ReverseMarkdown/Converters/Code.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System;
using System;
using System.IO;
using HtmlAgilityPack;
using ReverseMarkdown.Helpers;


namespace ReverseMarkdown.Converters {
Expand All @@ -12,6 +13,13 @@ public Code(Converter converter) : base(converter)

public override void Convert(TextWriter writer, HtmlNode node)
{
if (Converter.Config.TelegramMarkdownV2) {
writer.Write('`');
writer.Write(StringUtils.EscapeTelegramMarkdownV2Code(DecodeHtml(node.InnerText)));
writer.Write('`');
return;
}

if (Converter.Config.CommonMark) {
var content = node.InnerHtml;
var fence = CreateCommonMarkCodeFence(content);
Expand Down
1 change: 1 addition & 0 deletions src/ReverseMarkdown/Converters/Em.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public override void Convert(TextWriter writer, HtmlNode node)
: string.Empty;

var emphasis = Converter.Config.SlackFlavored
|| Converter.Config.TelegramMarkdownV2
? "_"
: isCommonMark && Context.AncestorsAny("i")
? "_"
Expand Down
12 changes: 12 additions & 0 deletions src/ReverseMarkdown/Converters/H.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,18 @@ public override void Convert(TextWriter writer, HtmlNode node)
var level = node.Name[1] - '0'; // 'h1' -> 1, 'h2' -> 2, etc.

var content = TreatChildrenAsString(node);
if (Converter.Config.TelegramMarkdownV2) {
writer.WriteLine();
for (var i = 0; i < level; i++) {
writer.Write("\\#");
}

writer.Write(' ');
writer.Write(content);
writer.WriteLine();
return;
}

if (Converter.Config.CommonMark) {
content = content.ReplaceLineEndings("&#10;");
content = EscapeTrailingHashes(content);
Expand Down
7 changes: 7 additions & 0 deletions src/ReverseMarkdown/Converters/Hr.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ public override void Convert(TextWriter writer, HtmlNode node)
throw new SlackUnsupportedTagException(node.Name);
}

if (Converter.Config.TelegramMarkdownV2) {
writer.WriteLine();
writer.Write("\\-\\-\\-");
writer.WriteLine();
return;
}

writer.WriteLine();
writer.Write("* * *");
writer.WriteLine();
Expand Down
29 changes: 29 additions & 0 deletions src/ReverseMarkdown/Converters/Img.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,11 @@ public override void Convert(TextWriter writer, HtmlNode node)
}
}

if (Converter.Config.TelegramMarkdownV2) {
WriteTelegramFallback(writer, alt, src, isBase64Image);
return;
}

if (Converter.Config.CommonMark && altAttribute == null) {
writer.Write(node.OuterHtml);
return;
Expand All @@ -78,6 +83,30 @@ public override void Convert(TextWriter writer, HtmlNode node)
writer.Write(')');
}

private void WriteTelegramFallback(TextWriter writer, string alt, string src, bool isBase64Image)
{
var label = string.IsNullOrWhiteSpace(alt)
? "Image"
: $"Image: {alt}";

var escapedLabel = StringUtils.EscapeTelegramMarkdownV2(label);
var canRenderAsLink = !string.IsNullOrEmpty(src) && (
!isBase64Image ||
Converter.Config.Base64Images == Config.Base64ImageHandling.SaveToFile
);

if (!canRenderAsLink) {
writer.Write(escapedLabel);
return;
}

writer.Write('[');
writer.Write(escapedLabel);
writer.Write("](");
writer.Write(StringUtils.EscapeTelegramMarkdownV2LinkUrl(src));
writer.Write(')');
}

private string SaveBase64ImageToFile(string base64Src)
{
try
Expand Down
11 changes: 9 additions & 2 deletions src/ReverseMarkdown/Converters/Li.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.IO;
using System.Linq;
using HtmlAgilityPack;
using ReverseMarkdown.Helpers;


namespace ReverseMarkdown.Converters {
Expand Down Expand Up @@ -30,10 +31,16 @@ public override void Convert(TextWriter writer, HtmlNode node)
var start = node.ParentNode.GetAttributeValue("start", 1);
var index = node.ParentNode.SelectNodes("./li").IndexOf(node) + start;
writer.Write(index);
writer.Write(". ");
writer.Write(Converter.Config.TelegramMarkdownV2 ? "\\. " : ". ");
}
else {
writer.Write(Converter.Config.ListBulletChar);
if (Converter.Config.TelegramMarkdownV2) {
writer.Write(StringUtils.EscapeTelegramMarkdownV2(Converter.Config.ListBulletChar.ToString()));
}
else {
writer.Write(Converter.Config.ListBulletChar);
}

writer.Write(' ');
}

Expand Down
8 changes: 6 additions & 2 deletions src/ReverseMarkdown/Converters/Pre.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ public override void Convert(TextWriter writer, HtmlNode node)
return;
}

var isFencedCodeBlock = Converter.Config.GithubFlavored || Converter.Config.CommonMark;
var isTelegram = Converter.Config.TelegramMarkdownV2;
var isFencedCodeBlock = Converter.Config.GithubFlavored || Converter.Config.CommonMark || isTelegram;

var indentation = Converter.Config.CommonMark
var indentation = (Converter.Config.CommonMark || isTelegram)
? string.Empty
: IndentationFor(node);
var contentIndentation = indentation;
Expand All @@ -37,6 +38,9 @@ public override void Convert(TextWriter writer, HtmlNode node)

// content:
var content = DecodeHtml(node.InnerText);
if (isTelegram) {
content = StringUtils.EscapeTelegramMarkdownV2Code(content);
}

if (isFencedCodeBlock) {
var fence = Converter.Config.CommonMark
Expand Down
Loading
Loading