I've come across some unexpected behavior when producing and consuming excel documents using EPPlus (dotnet core, EPPlus version 4.5.3.2, on Windows). When I write a newline as carriage return + line feed but then read it back I only get line feed.
This following code demonstrates the issue. First the output:
String input:
Line1
Line2
Line3
new byte[] { 76, 105, 110, 101, 49, 10, 76, 105, 110, 101, 50, 13, 10, 76, 105, 110, 101, 51 }
String output:
Line1
Line2
Line3
new byte[] { 76, 105, 110, 101, 49, 10, 76, 105, 110, 101, 50, 10, 76, 105, 110, 101, 51 }
Environment newline:
new byte[] { 13, 10 }
Then the code:
using System;
using System.IO;
using System.Linq;
using System.Text;
using OfficeOpenXml;
namespace epplus_newline_issue
{
class Program
{
static void Main(string[] args)
{
var testFile = "test.xlsx";
var testString = "Line1\nLine2\r\nLine3";
File.Delete(testFile);
DebugString("String input", testString);
using (var excelPackage = new ExcelPackage())
{
var ws = excelPackage.Workbook.Worksheets.Add("test");
ws.Cells[1, 1].Value = testString;
File.WriteAllBytes(testFile, excelPackage.GetAsByteArray());
}
using (var stream = File.OpenRead(testFile))
using (var excelPackage = new ExcelPackage(stream))
{
var ws = excelPackage.Workbook.Worksheets.First();
var cellText = ws.Cells[1, 1].Value as string;
DebugString("String output", cellText);
}
DebugString("Environment newline", Environment.NewLine);
}
static void DebugString(string label, string s)
{
Console.WriteLine(label + ":");
Console.WriteLine(s);
Console.WriteLine(PrintBytes(Encoding.UTF8.GetBytes(s)));
}
static string PrintBytes(byte[] byteArray)
{
var sb = new StringBuilder("new byte[] { ");
for (var i = 0; i < byteArray.Length; i++)
{
var b = byteArray[i];
sb.Append(b);
if (i < byteArray.Length - 1)
{
sb.Append(", ");
}
}
sb.Append(" }");
return sb.ToString();
}
}
}
If I unpack the excel zip file and investigate what's inside sharedStrings.xml I can see the difference between lf and crlf - the carriage return is retained there:
$ xxd sharedStrings.xml
00000000: 3c3f 786d 6c20 7665 7273 696f 6e3d 2231 <?xml version="1
00000010: 2e30 2220 656e 636f 6469 6e67 3d22 5554 .0" encoding="UT
00000020: 462d 3822 2073 7461 6e64 616c 6f6e 653d F-8" standalone=
00000030: 2279 6573 2220 3f3e 3c73 7374 2078 6d6c "yes" ?><sst xml
00000040: 6e73 3d22 6874 7470 3a2f 2f73 6368 656d ns="http://schem
00000050: 6173 2e6f 7065 6e78 6d6c 666f 726d 6174 as.openxmlformat
00000060: 732e 6f72 672f 7370 7265 6164 7368 6565 s.org/spreadshee
00000070: 746d 6c2f 3230 3036 2f6d 6169 6e22 2063 tml/2006/main" c
00000080: 6f75 6e74 3d22 3122 2075 6e69 7175 6543 ount="1" uniqueC
00000090: 6f75 6e74 3d22 3122 3e3c 7369 3e3c 7420 ount="1"><si><t
000000a0: 786d 6c3a 7370 6163 653d 2270 7265 7365 xml:space="prese
000000b0: 7276 6522 3e4c 696e 6531 0a4c 696e 6532 rve">Line1.Line2
000000c0: 0d0a 4c69 6e65 333c 2f74 3e3c 2f73 693e ..Line3</t></si>
000000d0: 3c2f 7373 743e </sst>
So my conclusion is that there is an issue with how the file is read/parsed, not how it is written.
I've come across some unexpected behavior when producing and consuming excel documents using EPPlus (dotnet core, EPPlus version 4.5.3.2, on Windows). When I write a newline as carriage return + line feed but then read it back I only get line feed.
This following code demonstrates the issue. First the output:
Then the code:
If I unpack the excel zip file and investigate what's inside sharedStrings.xml I can see the difference between lf and crlf - the carriage return is retained there:
So my conclusion is that there is an issue with how the file is read/parsed, not how it is written.