|
| 1 | +using DatabaseBenchmark.DataSources.Parquet; |
| 2 | +using Parquet; |
| 3 | +using Parquet.Data; |
| 4 | +using Parquet.Schema; |
| 5 | +using System; |
| 6 | +using System.IO; |
| 7 | +using System.Threading.Tasks; |
| 8 | +using Xunit; |
| 9 | + |
| 10 | +namespace DatabaseBenchmark.Tests.DataSources |
| 11 | +{ |
| 12 | + public class ParquetDataSourceTests : IDisposable |
| 13 | + { |
| 14 | + private readonly string _testFilePath = "test.parquet"; |
| 15 | + |
| 16 | + public ParquetDataSourceTests() |
| 17 | + { |
| 18 | + } |
| 19 | + |
| 20 | + public void Dispose() |
| 21 | + { |
| 22 | + if (File.Exists(_testFilePath)) |
| 23 | + { |
| 24 | + File.Delete(_testFilePath); |
| 25 | + } |
| 26 | + } |
| 27 | + |
| 28 | + [Fact] |
| 29 | + public async Task ReadValues() |
| 30 | + { |
| 31 | + await CreateTestFile(_testFilePath); |
| 32 | + |
| 33 | + using var dataSource = new ParquetDataSource(_testFilePath); |
| 34 | + |
| 35 | + // Row 1 from row group 1 |
| 36 | + Assert.True(dataSource.Read()); |
| 37 | + Assert.Equal(1, dataSource.GetValue("ArchiveId")); |
| 38 | + Assert.Equal("One", dataSource.GetValue("Name")); |
| 39 | + Assert.Equal(10.1, dataSource.GetValue("Price")); |
| 40 | + |
| 41 | + // Row 2 from row group 1 |
| 42 | + Assert.True(dataSource.Read()); |
| 43 | + Assert.Equal(2, dataSource.GetValue("ArchiveId")); |
| 44 | + Assert.Equal("Two", dataSource.GetValue("Name")); |
| 45 | + Assert.Equal(20.2, dataSource.GetValue("Price")); |
| 46 | + |
| 47 | + // Row 1 from row group 2 (same data pattern) |
| 48 | + Assert.True(dataSource.Read()); |
| 49 | + Assert.Equal(1, dataSource.GetValue("ArchiveId")); |
| 50 | + Assert.Equal("One", dataSource.GetValue("Name")); |
| 51 | + Assert.Equal(10.1, dataSource.GetValue("Price")); |
| 52 | + |
| 53 | + // Row 2 from row group 2 |
| 54 | + Assert.True(dataSource.Read()); |
| 55 | + Assert.Equal(2, dataSource.GetValue("ArchiveId")); |
| 56 | + Assert.Equal("Two", dataSource.GetValue("Name")); |
| 57 | + Assert.Equal(20.2, dataSource.GetValue("Price")); |
| 58 | + |
| 59 | + // No more rows |
| 60 | + Assert.False(dataSource.Read()); |
| 61 | + } |
| 62 | + |
| 63 | + private static async Task CreateTestFile(string filePath) |
| 64 | + { |
| 65 | + var schema = new ParquetSchema( |
| 66 | + new DataField<int>("ArchiveId"), |
| 67 | + new DataField<string>("Name"), |
| 68 | + new DataField<double>("Price")); |
| 69 | + |
| 70 | + var column1 = new DataColumn(schema.DataFields[0], new int[] { 1, 2 }); |
| 71 | + var column2 = new DataColumn(schema.DataFields[1], new string[] { "One", "Two" }); |
| 72 | + var column3 = new DataColumn(schema.DataFields[2], new double[] { 10.1, 20.2 }); |
| 73 | + |
| 74 | + using var stream = File.OpenWrite(filePath); |
| 75 | + using var writer = await ParquetWriter.CreateAsync(schema, stream); |
| 76 | + |
| 77 | + // Write first row group |
| 78 | + using (var groupWriter = writer.CreateRowGroup()) |
| 79 | + { |
| 80 | + await groupWriter.WriteColumnAsync(column1); |
| 81 | + await groupWriter.WriteColumnAsync(column2); |
| 82 | + await groupWriter.WriteColumnAsync(column3); |
| 83 | + } |
| 84 | + |
| 85 | + // Write second row group with same data |
| 86 | + using (var groupWriter = writer.CreateRowGroup()) |
| 87 | + { |
| 88 | + await groupWriter.WriteColumnAsync(column1); |
| 89 | + await groupWriter.WriteColumnAsync(column2); |
| 90 | + await groupWriter.WriteColumnAsync(column3); |
| 91 | + } |
| 92 | + } |
| 93 | + } |
| 94 | +} |
0 commit comments