diff --git a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs index fb6138408b..2e62f3ea3e 100644 --- a/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs +++ b/backend/FwLite/FwDataMiniLcmBridge.Tests/MiniLcmTests/SortingTests.cs @@ -1,4 +1,7 @@ +using FwDataMiniLcmBridge.Api; +using FwDataMiniLcmBridge.LcmUtils; using FwDataMiniLcmBridge.Tests.Fixtures; +using MiniLcm.Models; namespace FwDataMiniLcmBridge.Tests.MiniLcmTests; @@ -9,4 +12,42 @@ protected override Task NewApi() { return Task.FromResult(fixture.NewProjectApi("sorting-test", "en", "en")); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField) + { + var otherMorphTypeEntryId = Guid.NewGuid(); + Entry[] expected = [ + new() { Id = otherMorphTypeEntryId, LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var fwDataApi = (BaseApi as FwDataMiniLcmApi)!; + await fwDataApi.Cache.DoUsingNewOrCurrentUOW("Clear morph type", + "Revert morph type", + () => + { + // the fwdata api doesn't allow creating entries with MorphType.Other or Unknown, so we force it + var otherMorphTypeEntry = fwDataApi.EntriesRepository.GetObject(otherMorphTypeEntryId); + otherMorphTypeEntry.LexemeFormOA.MorphTypeRA = null; + return ValueTask.CompletedTask; + }); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs index 82f7c8c7f0..09d1cb5b71 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/FwDataMiniLcmApi.cs @@ -652,7 +652,7 @@ private Entry FromLexEntry(ILexEntry entry) { try { - return new Entry + var result = new Entry { Id = entry.Guid, Note = FromLcmMultiString(entry.Comment), @@ -670,6 +670,7 @@ private Entry FromLexEntry(ILexEntry entry) // ILexEntry.PublishIn is a virtual property that inverts DoNotPublishInRC against all publications PublishIn = entry.PublishIn.Select(FromLcmPossibility).ToList(), }; + return result; } catch (Exception e) { @@ -939,12 +940,13 @@ private IEnumerable GetFilteredAndSortedEntries(Func private IEnumerable ApplySorting(SortOptions order, IEnumerable entries, string? query) { var sortWs = GetWritingSystemHandle(order.WritingSystem, WritingSystemType.Vernacular); + var stemSecondaryOrder = MorphTypeRepository.GetObject(MoMorphTypeTags.kguidMorphStem).SecondaryOrder; if (order.Field == SortField.SearchRelevance) { - return entries.ApplyRoughBestMatchOrder(order, sortWs, query); + return entries.ApplyRoughBestMatchOrder(order, sortWs, stemSecondaryOrder, query); } - return order.ApplyOrder(entries, e => e.LexEntryHeadword(sortWs)); + return entries.ApplyHeadwordOrder(order, sortWs, stemSecondaryOrder); } public IAsyncEnumerable SearchEntries(string query, QueryOptions? options = null) @@ -956,7 +958,7 @@ public IAsyncEnumerable SearchEntries(string query, QueryOptions? options private Func? EntrySearchPredicate(string? query = null) { if (string.IsNullOrEmpty(query)) return null; - return entry => entry.CitationForm.SearchValue(query) || + return entry => entry.SearchHeadWord(query) || // CitationForm.SearchValue would be redundant entry.LexemeFormOA?.Form.SearchValue(query) is true || entry.AllSenses.Any(s => s.Gloss.SearchValue(query)); } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs index d3b296cf8e..c5d5b4ae30 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/LcmHelpers.cs @@ -11,7 +11,7 @@ namespace FwDataMiniLcmBridge.Api; internal static class LcmHelpers { - internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null) + internal static string? LexEntryHeadword(this ILexEntry entry, int? ws = null, bool applyMorphTokens = true) { var citationFormTs = ws.HasValue ? entry.CitationForm.get_String(ws.Value) @@ -27,7 +27,12 @@ internal static class LcmHelpers : null; var lexemeForm = lexemeFormTs?.Text?.Trim(WhitespaceChars); - return lexemeForm; + if (string.IsNullOrEmpty(lexemeForm) || !applyMorphTokens) return lexemeForm; + + var morphType = entry.LexemeFormOA?.MorphTypeRA; + var leading = morphType?.Prefix ?? ""; + var trailing = morphType?.Postfix ?? ""; + return (leading + lexemeForm + trailing).Trim(WhitespaceChars); } internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = null) @@ -36,6 +41,19 @@ internal static string LexEntryHeadwordOrUnknown(this ILexEntry entry, int? ws = return string.IsNullOrEmpty(headword) ? Entry.UnknownHeadword : headword; } + internal static bool SearchHeadWord(this ILexEntry entry, string value) + { + foreach (var ws in entry.Cache.ServiceLocator.WritingSystems.VernacularWritingSystems) + { + var headword = entry.HeadWordForWs(ws.Handle); + if (headword is null) continue; + var text = headword.Text; + if (string.IsNullOrEmpty(text)) continue; + if (text.ContainsDiacriticMatch(value)) return true; + } + return false; + } + internal static bool SearchValue(this ITsMultiString multiString, string value) { for (var i = 0; i < multiString.StringCount; i++) diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs index 82b87386f8..6c243f54f4 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/Sorting.cs @@ -6,31 +6,55 @@ namespace FwDataMiniLcmBridge.Api; internal static class Sorting { + public static IEnumerable ApplyHeadwordOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder) + { + if (order.Ascending) + { + return entries + .OrderBy(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)) + .ThenBy(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + .ThenBy(e => e.HomographNumber) + .ThenBy(e => e.Id.Guid); + } + else + { + return entries + .OrderByDescending(e => e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false)) + .ThenByDescending(e => e.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + .ThenByDescending(e => e.HomographNumber) + .ThenByDescending(e => e.Id.Guid); + } + } + /// /// Rough emulation of FTS search relevance. Headword matches come first, preferring /// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical. /// See also: EntrySearchService.FilterAndRank for the FTS-based equivalent in LcmCrdt. /// - public static IEnumerable ApplyRoughBestMatchOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, string? query = null) + public static IEnumerable ApplyRoughBestMatchOrder(this IEnumerable entries, SortOptions order, int sortWsHandle, int stemSecondaryOrder, string? query = null) { - var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle))); + var projected = entries.Select(e => (Entry: e, Headword: e.LexEntryHeadword(sortWsHandle, applyMorphTokens: false))); if (order.Ascending) { return projected - .OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) - .ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .OrderByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .ThenByDescending(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) .ThenBy(x => x.Headword?.Length ?? 0) .ThenBy(x => x.Headword) + .ThenBy(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + .ThenBy(x => x.Entry.HomographNumber) .ThenBy(x => x.Entry.Id.Guid) .Select(x => x.Entry); } else { return projected - .OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) - .ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .OrderBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.StartsWithDiacriticMatch(query!) ?? false)) + .ThenBy(x => !string.IsNullOrEmpty(query) && (x.Headword?.ContainsDiacriticMatch(query!) ?? false)) .ThenByDescending(x => x.Headword?.Length ?? 0) .ThenByDescending(x => x.Headword) + .ThenByDescending(x => x.Entry.PrimaryMorphType?.SecondaryOrder ?? stemSecondaryOrder) + .ThenByDescending(x => x.Entry.HomographNumber) .ThenByDescending(x => x.Entry.Id.Guid) .Select(x => x.Entry); } diff --git a/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs b/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs index 833eb1ffd9..052a07567c 100644 --- a/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs +++ b/backend/FwLite/FwDataMiniLcmBridge/Api/UpdateProxy/UpdateMorphTypeProxy.cs @@ -1,3 +1,4 @@ +using System.Diagnostics.CodeAnalysis; using MiniLcm.Models; using SIL.LCModel; @@ -8,11 +9,13 @@ public class UpdateMorphTypeProxy : MorphType private readonly IMoMorphType _lcmMorphType; private readonly FwDataMiniLcmApi _lexboxLcmApi; + [SetsRequiredMembers] public UpdateMorphTypeProxy(IMoMorphType lcmMorphType, FwDataMiniLcmApi lexboxLcmApi) { _lcmMorphType = lcmMorphType; Id = lcmMorphType.Guid; _lexboxLcmApi = lexboxLcmApi; + Kind = LcmHelpers.FromLcmMorphType(lcmMorphType); } public override MultiString Name diff --git a/backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs b/backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs index fa6983d27d..eb883aaa68 100644 --- a/backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs +++ b/backend/FwLite/FwLiteProjectSync.Tests/FluentAssertGlobalConfig.cs @@ -1,3 +1,4 @@ +using System.Runtime.CompilerServices; using FluentAssertions.Extensibility; using FwLiteProjectSync.Tests; @@ -7,6 +8,12 @@ namespace FwLiteProjectSync.Tests; public static class FluentAssertGlobalConfig { + [ModuleInitializer] + internal static void InitVerify() + { + VerifierSettings.OmitContentFromException(); + } + public static void Initialize() { MiniLcm.Tests.FluentAssertGlobalConfig.Initialize(); diff --git a/backend/FwLite/FwLiteProjectSync.Tests/ProjectSnapshotSerializationTests.cs b/backend/FwLite/FwLiteProjectSync.Tests/ProjectSnapshotSerializationTests.cs index 854a505ede..62d34ebd22 100644 --- a/backend/FwLite/FwLiteProjectSync.Tests/ProjectSnapshotSerializationTests.cs +++ b/backend/FwLite/FwLiteProjectSync.Tests/ProjectSnapshotSerializationTests.cs @@ -54,6 +54,7 @@ public async Task AssertSena3Snapshots(string sourceSnapshotName) } [Fact] + [Trait("Category", "Verified")] public async Task LatestSena3SnapshotRoundTrips() { // arrange diff --git a/backend/FwLite/FwLiteProjectSync.Tests/Sena3SyncTests.cs b/backend/FwLite/FwLiteProjectSync.Tests/Sena3SyncTests.cs index 8acdcf5f9e..1c2fb60753 100644 --- a/backend/FwLite/FwLiteProjectSync.Tests/Sena3SyncTests.cs +++ b/backend/FwLite/FwLiteProjectSync.Tests/Sena3SyncTests.cs @@ -96,6 +96,37 @@ private async Task WorkaroundMissingWritingSystems() } + [Fact] + [Trait("Category", "Integration")] + public async Task CanonicalMorphTypes_MatchFwDataMorphTypes() + { + var fwDataMorphTypes = await _fwDataApi.GetMorphTypes().ToArrayAsync(); + fwDataMorphTypes.Should().NotBeEmpty("Sena 3 should have morph types"); + + // Verify every FwData morph type has a matching canonical entry + foreach (var fwMorphType in fwDataMorphTypes) + { + if (fwMorphType.Kind == MorphTypeKind.Unknown) + continue; + + CanonicalMorphTypes.All.Should().ContainKey(fwMorphType.Kind, + $"canonical morph types should include {fwMorphType.Kind}"); + var canonical = CanonicalMorphTypes.All[fwMorphType.Kind]; + canonical.Id.Should().Be(fwMorphType.Id, $"GUID for {fwMorphType.Kind} should match FwData"); + canonical.Prefix.Should().Be(fwMorphType.Prefix, $"Prefix for {fwMorphType.Kind} should match FwData"); + canonical.Postfix.Should().Be(fwMorphType.Postfix, $"Postfix for {fwMorphType.Kind} should match FwData"); + canonical.SecondaryOrder.Should().Be(fwMorphType.SecondaryOrder, $"SecondaryOrder for {fwMorphType.Kind} should match FwData"); + } + + // Verify every canonical morph type exists in FwData (no extras we shouldn't have) + var fwDataKinds = fwDataMorphTypes + .Where(m => m.Kind != MorphTypeKind.Unknown) + .Select(m => m.Kind) + .ToHashSet(); + CanonicalMorphTypes.All.Keys.Should().BeSubsetOf(fwDataKinds, + "every canonical morph type should exist in the Sena 3 FwData project"); + } + [Fact] [Trait("Category", "Integration")] public async Task DryRunImport_MakesNoChanges() @@ -207,6 +238,7 @@ public async Task SecondSena3SyncDoesNothing() /// [Fact] [Trait("Category", "Integration")] + [Trait("Category", "Verified")] public async Task LiveSena3Sync() { // arrange - put "live" crdt db and fw-headless snapshot in place diff --git a/backend/FwLite/FwLiteProjectSync.Tests/SyncTests.cs b/backend/FwLite/FwLiteProjectSync.Tests/SyncTests.cs index 5e9928e761..12d9fd80f5 100644 --- a/backend/FwLite/FwLiteProjectSync.Tests/SyncTests.cs +++ b/backend/FwLite/FwLiteProjectSync.Tests/SyncTests.cs @@ -702,4 +702,32 @@ public async Task CanCreateAComplexFormTypeAndSyncsIt() _fixture.FwDataApi.GetComplexFormTypes().ToBlockingEnumerable().Should().ContainEquivalentOf(complexFormEntry); } + + [Fact] + [Trait("Category", "Integration")] + public async Task SyncWithLegacySnapshot_EmptyMorphTypes_DoesNotDuplicate() + { + var crdtApi = _fixture.CrdtApi; + var fwdataApi = _fixture.FwDataApi; + + // First sync: import so both sides have data + await _syncService.Import(crdtApi, fwdataApi); + var snapshot = await _fixture.RegenerateAndGetSnapshot(); + + // Simulate a legacy snapshot by clearing MorphTypes + var legacySnapshot = snapshot with { MorphTypes = [] }; + + // The CRDT should already have morph types (from seeding in MigrateDb). + // Syncing with a legacy snapshot should patch the snapshot and not duplicate morph types. + var syncResult = await _syncService.Sync(crdtApi, fwdataApi, legacySnapshot); + + // Verify no duplicates + var crdtMorphTypes = await crdtApi.GetMorphTypes().ToArrayAsync(); + crdtMorphTypes.Should().OnlyHaveUniqueItems(mt => mt.Kind); + crdtMorphTypes.Should().NotBeEmpty(); + + // Verify no morph-type changes were needed (they were patched from CRDT) + syncResult.CrdtChanges.Should().Be(0); + syncResult.FwdataChanges.Should().Be(0); + } } diff --git a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs index ed729a7045..5bf4fd967a 100644 --- a/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs +++ b/backend/FwLite/FwLiteProjectSync/CrdtFwdataProjectSyncService.cs @@ -76,6 +76,18 @@ private async Task SyncOrImportInternal(IMiniLcmApi crdtApi, IMiniLc { // Repair any missing translation IDs before doing the full sync, so the sync doesn't have to deal with them var syncedIdCount = await CrdtRepairs.SyncMissingTranslationIds(projectSnapshot.Entries, fwdata, crdt, dryRun); + + // Patch legacy snapshots that were created before morph-type support. + // After seeding, the CRDT has morph-types but the snapshot still has []. + // Without this patch, the diff would see all morph-types as "new" and try to re-add them. + if (projectSnapshot.MorphTypes.Length == 0) + { + var currentCrdtMorphTypes = await crdt.GetMorphTypes().ToArrayAsync(); + if (currentCrdtMorphTypes.Length > 0) + { + projectSnapshot = projectSnapshot with { MorphTypes = currentCrdtMorphTypes }; + } + } } var syncResult = projectSnapshot is null diff --git a/backend/FwLite/LcmCrdt.Tests/Changes/ChangeSerializationTests.cs b/backend/FwLite/LcmCrdt.Tests/Changes/ChangeSerializationTests.cs index a35e467e45..894c0c5937 100644 --- a/backend/FwLite/LcmCrdt.Tests/Changes/ChangeSerializationTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Changes/ChangeSerializationTests.cs @@ -124,6 +124,7 @@ public void CanDeserializeLegacyRegressionData() } [Fact] + [Trait("Category", "Verified")] public async Task RegressionDataUpToDate() { var legacyJsonArray = ReadJsonArrayFromFile(GetJsonFilePath("ChangeDeserializationRegressionData.legacy.verified.txt")); diff --git a/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs b/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs index 9fcac6de49..505a20b999 100644 --- a/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Data/FilteringTests.cs @@ -1,19 +1,20 @@ using LcmCrdt.Data; -using MiniLcm.Models; namespace LcmCrdt.Tests.Data; public class FilteringTests { private readonly List _entries; + private readonly IQueryable _morphTypes; public FilteringTests() { _entries = [ new Entry { LexemeForm = { { "en", "123" } }, }, - new Entry { LexemeForm = { { "en", "456" } }, } + new Entry { LexemeForm = { { "en", "456" } }, }, ]; + _morphTypes = CanonicalMorphTypes.All.Values.ToArray().AsQueryable(); } [Theory] @@ -36,7 +37,7 @@ public void WhereExemplar_CompiledFilter_ShouldReturnSameResults(string exemplar [InlineData("9")] public void SearchFilter_CompiledFilter_ShouldReturnSameResults(string query) { - var expected = _entries.AsQueryable().Where(Filtering.SearchFilter(query)).ToList(); + var expected = Filtering.SearchFilter(_entries.AsQueryable(), _morphTypes, query).ToList(); var actual = _entries.Where(Filtering.CompiledFilter(query, "en", null)).ToList(); @@ -52,9 +53,8 @@ public void CombinedFilter_CompiledFilter_ShouldReturnSameResults(string exempla { WritingSystemId ws = "en"; - var expected = _entries.AsQueryable() - .WhereExemplar(ws, exemplar) - .Where(Filtering.SearchFilter(query)) + var expected = Filtering.SearchFilter( + _entries.AsQueryable().WhereExemplar(ws, exemplar), _morphTypes, query) .ToList(); var actual = _entries.Where(Filtering.CompiledFilter(query, ws, exemplar)).ToList(); diff --git a/backend/FwLite/LcmCrdt.Tests/Data/MigrationTests.cs b/backend/FwLite/LcmCrdt.Tests/Data/MigrationTests.cs index e694d22086..b8c985e6c4 100644 --- a/backend/FwLite/LcmCrdt.Tests/Data/MigrationTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Data/MigrationTests.cs @@ -22,6 +22,7 @@ public class MigrationTests : IAsyncLifetime internal static void Init() { VerifySystemJson.Initialize(); + VerifierSettings.OmitContentFromException(); } public Task InitializeAsync() @@ -54,6 +55,7 @@ public async Task GetEntries_WorksAfterMigrationFromScriptedDb(RegressionTestHel [Theory] [InlineData(RegressionTestHelper.RegressionVersion.v1)] [InlineData(RegressionTestHelper.RegressionVersion.v2)] + [Trait("Category", "Verified")] public async Task VerifyAfterMigrationFromScriptedDb(RegressionTestHelper.RegressionVersion regressionVersion) { await _helper.InitializeAsync(regressionVersion); @@ -105,6 +107,7 @@ await Task.WhenAll( [Theory] [InlineData(RegressionTestHelper.RegressionVersion.v1)] [InlineData(RegressionTestHelper.RegressionVersion.v2)] + [Trait("Category", "Verified")] public async Task VerifyRegeneratedSnapshotsAfterMigrationFromScriptedDb(RegressionTestHelper.RegressionVersion regressionVersion) { await _helper.InitializeAsync(regressionVersion); diff --git a/backend/FwLite/LcmCrdt.Tests/Data/RegressionTestHelper.cs b/backend/FwLite/LcmCrdt.Tests/Data/RegressionTestHelper.cs index 412b433c9d..ff40d0c591 100644 --- a/backend/FwLite/LcmCrdt.Tests/Data/RegressionTestHelper.cs +++ b/backend/FwLite/LcmCrdt.Tests/Data/RegressionTestHelper.cs @@ -73,6 +73,7 @@ private static string GetFilePath(string name, [CallerFilePath] string sourceFil public enum RegressionVersion { v1, - v2 + v2, + v3 } } diff --git a/backend/FwLite/LcmCrdt.Tests/Data/SnapshotDeserializationTests.cs b/backend/FwLite/LcmCrdt.Tests/Data/SnapshotDeserializationTests.cs index cd429f9330..e6ee0f894a 100644 --- a/backend/FwLite/LcmCrdt.Tests/Data/SnapshotDeserializationTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/Data/SnapshotDeserializationTests.cs @@ -91,6 +91,7 @@ public void CanDeserializeLegacyRegressionData() } [Fact] + [Trait("Category", "Verified")] public async Task RegressionDataUpToDate() { var legacyJsonArray = ReadJsonArrayFromFile(GetJsonFilePath("SnapshotDeserializationRegressionData.legacy.verified.txt")); diff --git a/backend/FwLite/LcmCrdt.Tests/DataModelSnapshotTests.cs b/backend/FwLite/LcmCrdt.Tests/DataModelSnapshotTests.cs index e19f884526..a12a8ed43a 100644 --- a/backend/FwLite/LcmCrdt.Tests/DataModelSnapshotTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/DataModelSnapshotTests.cs @@ -54,24 +54,28 @@ public async Task DisposeAsync() } [Fact] + [Trait("Category", "Verified")] public async Task VerifyDbModel() { await Verify(_crdtDbContext.Model.ToDebugString(MetadataDebugStringOptions.LongDefault)); } [Fact] + [Trait("Category", "Verified")] public async Task VerifyChangeModels() { await Verify(_jsonSerializerOptions.GetTypeInfo(typeof(IChange)).PolymorphismOptions); } [Fact] + [Trait("Category", "Verified")] public async Task VerifyIObjectBaseModels() { await Verify(_jsonSerializerOptions.GetTypeInfo(typeof(IObjectBase)).PolymorphismOptions); } [Fact] + [Trait("Category", "Verified")] public async Task VerifyIObjectWithIdModels() { await Verify(_jsonSerializerOptions.GetTypeInfo(typeof(IObjectWithId)).PolymorphismOptions); diff --git a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs index 4ad4984328..50130bb9e5 100644 --- a/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/FullTextSearch/EntrySearchServiceTests.cs @@ -81,6 +81,7 @@ public async Task UpdateEntrySearchTableEnumerable_DoesNotCreateDuplicates() } [Fact] + [Trait("Category", "Verified")] public async Task SearchTableIsUpdatedAutomaticallyOnInsert() { var id = Guid.NewGuid(); @@ -117,6 +118,7 @@ public async Task SearchTableIsUpdatedAutomaticallyOnInsert() } [Fact] + [Trait("Category", "Verified")] public async Task SearchTableIsUpdatedAutomaticallyOnUpdate() { var id = Guid.NewGuid(); diff --git a/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs index 0f5abb8e73..1f0089f470 100644 --- a/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs +++ b/backend/FwLite/LcmCrdt.Tests/MiniLcmTests/SortingTests.cs @@ -16,4 +16,30 @@ public override async Task DisposeAsync() await base.DisposeAsync(); await _fixture.DisposeAsync(); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task SecondaryOrder_DefaultsToStem(string query, SortField sortField) + { + Entry[] expected = [ + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Unknown }, // SecondaryOrder defaults to Stem = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.BoundStem }, // SecondaryOrder = 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/LcmCrdt.Tests/MorphTypeSeedingTests.cs b/backend/FwLite/LcmCrdt.Tests/MorphTypeSeedingTests.cs new file mode 100644 index 0000000000..225b93b2d1 --- /dev/null +++ b/backend/FwLite/LcmCrdt.Tests/MorphTypeSeedingTests.cs @@ -0,0 +1,140 @@ +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using MiniLcm.Models; +using static LcmCrdt.CrdtProjectsService; + +namespace LcmCrdt.Tests; + +public class MorphTypeSeedingTests +{ + [Fact] + public async Task NewProjectWithSeedData_HasAllCanonicalMorphTypes() + { + var sqliteFile = "MorphTypeSeed_NewProject.sqlite"; + if (File.Exists(sqliteFile)) File.Delete(sqliteFile); + var builder = Host.CreateEmptyApplicationBuilder(null); + builder.Services.AddTestLcmCrdtClient(); + using var host = builder.Build(); + await using var scope = host.Services.CreateAsyncScope(); + + var crdtProjectsService = scope.ServiceProvider.GetRequiredService(); + var crdtProject = await crdtProjectsService.CreateProject(new( + Name: "MorphTypeSeedTest", + Code: "morph-type-seed-test", + Path: "", + SeedNewProjectData: true)); + + var api = (CrdtMiniLcmApi)await scope.ServiceProvider.OpenCrdtProject(crdtProject); + var morphTypes = await api.GetMorphTypes().ToArrayAsync(); + + morphTypes.Should().HaveCount(CanonicalMorphTypes.All.Count); + foreach (var canonical in CanonicalMorphTypes.All.Values) + { + var mt = morphTypes.Should().ContainSingle(m => m.Kind == canonical.Kind).Subject; + mt.Id.Should().Be(canonical.Id); + mt.Name["en"].Should().Be(canonical.Name["en"]); + mt.Abbreviation["en"].Should().Be(canonical.Abbreviation["en"]); + mt.Prefix.Should().Be(canonical.Prefix); + mt.Postfix.Should().Be(canonical.Postfix); + mt.SecondaryOrder.Should().Be(canonical.SecondaryOrder); + } + + await using var dbContext = await scope.ServiceProvider.GetRequiredService>().CreateDbContextAsync(); + await dbContext.Database.EnsureDeletedAsync(); + } + + [Fact] + public async Task ExistingProjectWithoutMorphTypes_GetsMorphTypesOnOpen() + { + var sqliteFile = "MorphTypeSeed_ExistingProject.sqlite"; + if (File.Exists(sqliteFile)) File.Delete(sqliteFile); + var builder = Host.CreateEmptyApplicationBuilder(null); + builder.Services.AddTestLcmCrdtClient(); + using var host = builder.Build(); + await using var scope = host.Services.CreateAsyncScope(); + + var crdtProjectsService = scope.ServiceProvider.GetRequiredService(); + // Create project WITHOUT seeding + var crdtProject = await crdtProjectsService.CreateProject(new( + Name: "MorphTypeSeedExisting", + Code: "morph-type-seed-existing", + Path: "", + SeedNewProjectData: false)); + + // Opening the project triggers MigrateDb, which seeds morph types if missing + var api = (CrdtMiniLcmApi)await scope.ServiceProvider.OpenCrdtProject(crdtProject); + var morphTypes = await api.GetMorphTypes().ToArrayAsync(); + + morphTypes.Should().HaveCount(CanonicalMorphTypes.All.Count); + + await using var dbContext = await scope.ServiceProvider.GetRequiredService>().CreateDbContextAsync(); + await dbContext.Database.EnsureDeletedAsync(); + } + + [Fact] + public async Task SeedingIsIdempotent_OpeningProjectTwiceDoesNotDuplicate() + { + var sqliteFile = "MorphTypeSeed_Idempotent.sqlite"; + if (File.Exists(sqliteFile)) File.Delete(sqliteFile); + var builder = Host.CreateEmptyApplicationBuilder(null); + builder.Services.AddTestLcmCrdtClient(); + using var host = builder.Build(); + + // First open: seed morph types + { + await using var scope = host.Services.CreateAsyncScope(); + var crdtProjectsService = scope.ServiceProvider.GetRequiredService(); + var crdtProject = await crdtProjectsService.CreateProject(new( + Name: "MorphTypeSeedIdempotent", + Code: "morph-type-seed-idempotent", + Path: "", + SeedNewProjectData: true)); + await scope.ServiceProvider.OpenCrdtProject(crdtProject); + } + + // Second open: MigrateDb should detect existing morph types and skip seeding + // Note: MigrationTasks is static, so we need to clear it to re-trigger MigrateDb. + // In production, this doesn't happen (each process lifetime runs once). + // Instead, we verify by count that the seeding itself is duplicate-safe. + { + await using var scope = host.Services.CreateAsyncScope(); + var api = scope.ServiceProvider.GetRequiredService(); + var morphTypes = await api.GetMorphTypes().ToArrayAsync(); + morphTypes.Should().HaveCount(CanonicalMorphTypes.All.Count, + "morph types should not be duplicated"); + } + + await using var cleanupScope = host.Services.CreateAsyncScope(); + await using var dbContext = await cleanupScope.ServiceProvider.GetRequiredService>().CreateDbContextAsync(); + await dbContext.Database.EnsureDeletedAsync(); + } + + [Fact] + public void CanonicalMorphTypes_CoverAllKindsExceptUnknown() + { + var allKinds = Enum.GetValues() + .Where(k => k != MorphTypeKind.Unknown) + .ToHashSet(); + + CanonicalMorphTypes.All.Keys.Should().BeEquivalentTo(allKinds); + } + + [Fact] + public void CanonicalMorphTypes_HaveUniqueIds() + { + var ids = CanonicalMorphTypes.All.Values.Select(m => m.Id).ToList(); + ids.Should().OnlyHaveUniqueItems(); + } + + [Fact] + public void CanonicalMorphTypes_HaveRequiredFields() + { + foreach (var mt in CanonicalMorphTypes.All.Values) + { + mt.Id.Should().NotBe(Guid.Empty, $"MorphType {mt.Kind} should have a non-empty Id"); + mt.Name["en"].Should().NotBeNullOrWhiteSpace($"MorphType {mt.Kind} should have an English name"); + mt.Abbreviation["en"].Should().NotBeNullOrWhiteSpace($"MorphType {mt.Kind} should have an English abbreviation"); + } + } +} diff --git a/backend/FwLite/LcmCrdt/CrdtProjectsService.cs b/backend/FwLite/LcmCrdt/CrdtProjectsService.cs index 918d47cc78..2e9ab5af24 100644 --- a/backend/FwLite/LcmCrdt/CrdtProjectsService.cs +++ b/backend/FwLite/LcmCrdt/CrdtProjectsService.cs @@ -244,6 +244,7 @@ internal static async Task SeedSystemData(DataModel dataModel, Guid clientId) await PreDefinedData.PredefinedComplexFormTypes(dataModel, clientId); await PreDefinedData.PredefinedPartsOfSpeech(dataModel, clientId); await PreDefinedData.PredefinedSemanticDomains(dataModel, clientId); + await PreDefinedData.PredefinedMorphTypes(dataModel, clientId); } [GeneratedRegex("^[a-zA-Z0-9][a-zA-Z0-9-_]+$")] diff --git a/backend/FwLite/LcmCrdt/CurrentProjectService.cs b/backend/FwLite/LcmCrdt/CurrentProjectService.cs index a31a6f7ed2..5096895afd 100644 --- a/backend/FwLite/LcmCrdt/CurrentProjectService.cs +++ b/backend/FwLite/LcmCrdt/CurrentProjectService.cs @@ -1,10 +1,12 @@ using System.Collections.Concurrent; using LcmCrdt.FullTextSearch; +using LcmCrdt.Objects; using LcmCrdt.Project; using Microsoft.EntityFrameworkCore; using Microsoft.Extensions.Caching.Memory; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using SIL.Harmony; namespace LcmCrdt; @@ -105,6 +107,16 @@ async Task Execute() { await using var dbContext = await DbContextFactory.CreateDbContextAsync(); await dbContext.Database.MigrateAsync(); + + // Seed morph-types if missing (for existing projects created before morph-type support). + // Must happen BEFORE FTS regeneration so headwords include morph-type tokens. + if (!await dbContext.MorphTypes.AnyAsync()) + { + var dataModel = services.GetRequiredService(); + var projectData = await dbContext.ProjectData.AsNoTracking().FirstAsync(); + await PreDefinedData.PredefinedMorphTypes(dataModel, projectData.ClientId); + } + if (EntrySearchServiceFactory is not null) { await using var ess = EntrySearchServiceFactory.CreateSearchService(dbContext); diff --git a/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs b/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs index fdbddb4264..d55d1b8346 100644 --- a/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs +++ b/backend/FwLite/LcmCrdt/Data/EntryQueryHelpers.cs @@ -17,4 +17,79 @@ public static string Headword(this Entry e, WritingSystemId ws) (e, ws) => (string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws])) ? Json.Value(e.LexemeForm, ms => ms[ws]) : Json.Value(e.CitationForm, ms => ms[ws]))!.Trim(); + + [ExpressionMethod(nameof(HeadwordWithTokensExpression))] + public static string HeadwordWithTokens(this Entry e, WritingSystemId ws, string? leading, string? trailing) + { + var citation = e.CitationForm[ws]; + if (!string.IsNullOrEmpty(citation)) return citation.Trim(); + var lexeme = e.LexemeForm[ws]; + if (string.IsNullOrEmpty(lexeme)) return string.Empty; + return ((leading ?? "") + lexeme + (trailing ?? "")).Trim(); + } + + private static Expression> HeadwordWithTokensExpression() => + (e, ws, leading, trailing) => + string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[ws])) + ? string.IsNullOrEmpty(Json.Value(e.LexemeForm, ms => ms[ws])) + ? "" + : ((leading ?? "") + Json.Value(e.LexemeForm, ms => ms[ws]) + (trailing ?? "")).Trim() + : Json.Value(e.CitationForm, ms => ms[ws])!.Trim(); + + [ExpressionMethod(nameof(SearchHeadwords))] + public static bool SearchHeadwords(this Entry e, string? leading, string? trailing, string query) + { + return e.CitationForm.SearchValue(query) + || e.LexemeForm.Values.Any(kvp => + string.IsNullOrEmpty(e.CitationForm[kvp.Key]) && + SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kvp.Value + (trailing ?? ""), query)); + } + + private static Expression> SearchHeadwords() + { + return (e, leading, trailing, query) => + Json.QueryValues(e.CitationForm).Any( + v => SqlHelpers.ContainsIgnoreCaseAccents(v, query)) || + Json.QueryEntries(e.LexemeForm).Any(kv => + string.IsNullOrEmpty(Json.Value(e.CitationForm, ms => ms[kv.Key])) && + SqlHelpers.ContainsIgnoreCaseAccents((leading ?? "") + kv.Value + (trailing ?? ""), query)); + } + + + /// + /// Computes headwords for all writing systems present in CitationForm or LexemeForm, + /// applying morph tokens when CitationForm is absent. + /// Used for in-memory population of Entry.Headword after loading from DB. + /// + public static MultiString ComputeHeadwords(Entry entry, + IReadOnlyDictionary morphTypeDataLookup) + { + var result = new MultiString(); + morphTypeDataLookup.TryGetValue(entry.MorphType, out var morphData); + + // Iterate all WS keys that have data, not just "current" vernacular WSs, + // so we don't lose headwords for non-current or future writing systems. + var wsIds = entry.CitationForm.Values.Keys + .Union(entry.LexemeForm.Values.Keys); + + foreach (var wsId in wsIds) + { + var citation = entry.CitationForm[wsId]; + if (!string.IsNullOrEmpty(citation)) + { + result[wsId] = citation.Trim(); + continue; + } + + var lexeme = entry.LexemeForm[wsId]; + if (!string.IsNullOrEmpty(lexeme)) + { + var leading = morphData?.Prefix ?? ""; + var trailing = morphData?.Postfix ?? ""; + result[wsId] = (leading + lexeme + trailing).Trim(); + } + } + + return result; + } } diff --git a/backend/FwLite/LcmCrdt/Data/Filtering.cs b/backend/FwLite/LcmCrdt/Data/Filtering.cs index d2fc9b45cd..638dc29742 100644 --- a/backend/FwLite/LcmCrdt/Data/Filtering.cs +++ b/backend/FwLite/LcmCrdt/Data/Filtering.cs @@ -15,11 +15,15 @@ public static IQueryable WhereExemplar( return query.Where(e => e.Headword(ws).StartsWith(exemplar)); } - public static Expression> SearchFilter(string query) + public static IQueryable SearchFilter(IQueryable entries, IQueryable morphTypes, string query) { - return e => e.LexemeForm.SearchValue(query) - || e.CitationForm.SearchValue(query) - || e.Senses.Any(s => s.Gloss.SearchValue(query)); + return from entry in entries + join mt in morphTypes on entry.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + where entry.SearchHeadwords(mt.Prefix, mt.Postfix, query) // CitationForm.SearchValue would be redundant + || entry.LexemeForm.SearchValue(query) + || entry.Senses.Any(s => s.Gloss.SearchValue(query)) + select entry; } public static Expression> FtsFilter(string query, IQueryable diff --git a/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs b/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs index 44a92673a4..e4b35ad95b 100644 --- a/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs +++ b/backend/FwLite/LcmCrdt/Data/MiniLcmRepository.cs @@ -72,6 +72,7 @@ public void Dispose() public IQueryable WritingSystemsOrdered => dbContext.WritingSystemsOrdered; public IQueryable SemanticDomains => dbContext.SemanticDomains; public IQueryable PartsOfSpeech => dbContext.PartsOfSpeech; + public IQueryable Publications => dbContext.Publications; @@ -200,20 +201,24 @@ private async Task EnsureWritingSystemIsPopulated(QueryOptions que { if (SearchService is not null && SearchService.ValidSearchTerm(query)) { + var morphTypes = await dbContext.MorphTypes.ToArrayAsyncEF(); if (sortOptions is not null && sortOptions.Field == SortField.SearchRelevance) { //ranking must be done at the same time as part of the full-text search, so we can't use normal sorting sortingHandled = true; - queryable = SearchService.FilterAndRank(queryable, query, sortOptions.WritingSystem); + queryable = SearchService.FilterAndRank(queryable, query, sortOptions.WritingSystem, morphTypes); } else { - queryable = SearchService.Filter(queryable, query); + var filterWs = sortOptions?.WritingSystem + ?? (await GetWritingSystem(default, WritingSystemType.Vernacular))?.WsId + ?? default; + queryable = SearchService.Filter(queryable, query, filterWs, morphTypes); } } else { - queryable = queryable.Where(Filtering.SearchFilter(query)); + queryable = Filtering.SearchFilter(queryable, dbContext.GetTable(), query); } } @@ -225,12 +230,10 @@ private ValueTask> ApplySorting(IQueryable queryable, Q if (options.Order.WritingSystem == default) throw new ArgumentException("Sorting writing system must be specified", nameof(options)); - var wsId = options.Order.WritingSystem; - IQueryable result = options.Order.Field switch + var result = options.Order.Field switch { - SortField.SearchRelevance => queryable.ApplyRoughBestMatchOrder(options.Order, query), - SortField.Headword => - options.ApplyOrder(queryable, e => e.Headword(wsId).CollateUnicode(wsId)).ThenBy(e => e.Id), + SortField.SearchRelevance => queryable.ApplyRoughBestMatchOrder(dbContext.GetTable(), options.Order, query), + SortField.Headword => queryable.ApplyHeadwordOrder(dbContext.GetTable(), options.Order), _ => throw new ArgumentOutOfRangeException(nameof(options), "sort field unknown " + options.Order.Field) }; return new ValueTask>(result); diff --git a/backend/FwLite/LcmCrdt/Data/Sorting.cs b/backend/FwLite/LcmCrdt/Data/Sorting.cs index 957fd4b978..acb0690737 100644 --- a/backend/FwLite/LcmCrdt/Data/Sorting.cs +++ b/backend/FwLite/LcmCrdt/Data/Sorting.cs @@ -1,31 +1,77 @@ +using LinqToDB; + namespace LcmCrdt.Data; public static class Sorting { + public static IQueryable ApplyHeadwordOrder(this IQueryable entries, ITable morphTypes, SortOptions order, string? query = null) + { + var stemOrder = morphTypes.Where(m => m.Kind == MorphTypeKind.Stem).Select(m => m.SecondaryOrder); + if (order.Ascending) + { + return + from entry in entries + orderby + entry.Headword(order.WritingSystem).CollateUnicode(order.WritingSystem), + morphTypes.Where(m => m.Kind == entry.MorphType) + .Select(m => (int?)m.SecondaryOrder).FirstOrDefault() ?? stemOrder.FirstOrDefault(), + // entry.HomographNumber, + entry.Id + select entry; + } + else + { + return + from entry in entries + orderby + entry.Headword(order.WritingSystem).CollateUnicode(order.WritingSystem) descending, + (morphTypes.Where(m => m.Kind == entry.MorphType) + .Select(m => (int?)m.SecondaryOrder).FirstOrDefault() ?? stemOrder.FirstOrDefault()) descending, + // entry.HomographNumber descending, + entry.Id descending + select entry; + } + } + /// /// Rough search relevance for when FTS is unavailable. Headword matches come first, preferring /// prefix matches (e.g. when searching "tan" then "tanan" is before "matan"), then shorter, then alphabetical. /// See also: for the FTS-based equivalent. /// - public static IQueryable ApplyRoughBestMatchOrder(this IQueryable entries, SortOptions order, string? query = null) + public static IQueryable ApplyRoughBestMatchOrder(this IQueryable entries, ITable morphTypes, SortOptions order, string? query = null) { + var stemOrder = morphTypes.Where(m => m.Kind == MorphTypeKind.Stem).Select(m => m.SecondaryOrder); if (order.Ascending) { - return entries - .OrderByDescending(e => !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenByDescending(e => !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenBy(e => e.Headword(order.WritingSystem).Length) - .ThenBy(e => e.Headword(order.WritingSystem)) - .ThenBy(e => e.Id); + return + from e in entries + join mt in morphTypes on e.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + orderby + !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!) descending, + !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!) descending, + e.Headword(order.WritingSystem).Length, + e.Headword(order.WritingSystem), + mt != null ? mt.SecondaryOrder : stemOrder.FirstOrDefault(), + // e.HomographNumber, + e.Id + select e; } else { - return entries - .OrderBy(e => !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenBy(e => !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!)) - .ThenByDescending(e => e.Headword(order.WritingSystem).Length) - .ThenByDescending(e => e.Headword(order.WritingSystem)) - .ThenByDescending(e => e.Id); + return + from e in entries + join mt in morphTypes on e.MorphType equals mt.Kind into mtGroup + from mt in mtGroup.DefaultIfEmpty() + orderby + !string.IsNullOrEmpty(query) && SqlHelpers.StartsWithIgnoreCaseAccents(e.Headword(order.WritingSystem), query!), + !string.IsNullOrEmpty(query) && SqlHelpers.ContainsIgnoreCaseAccents(e.Headword(order.WritingSystem), query!), + e.Headword(order.WritingSystem).Length descending, + e.Headword(order.WritingSystem) descending, + (mt != null ? mt.SecondaryOrder : stemOrder.FirstOrDefault()) descending, + // e.HomographNumber descending, + e.Id descending + select e; } } } diff --git a/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs b/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs index 64d5bdaa89..3854f46823 100644 --- a/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs +++ b/backend/FwLite/LcmCrdt/FullTextSearch/EntrySearchService.cs @@ -29,9 +29,9 @@ public class EntrySearchService(LcmCrdtDbContext dbContext, ILogger EntrySearchRecordsTable => dbContext.GetTable(); - public IQueryable Filter(IQueryable queryable, string query) + public IQueryable Filter(IQueryable queryable, string query, WritingSystemId wsId, MorphType[] morphTypes) { - return FilterInternal(queryable, query).Select(t => t.Entry); + return FilterInternal(queryable, query, wsId, morphTypes).Select(t => t.Entry); } /// @@ -42,39 +42,85 @@ public IQueryable Filter(IQueryable queryable, string query) /// public IQueryable FilterAndRank(IQueryable queryable, string query, - WritingSystemId wsId) + WritingSystemId wsId, + MorphType[] morphTypes) { - var filtered = FilterInternal(queryable, query); + var morphTypeTable = dbContext.GetTable(); + var filtered = FilterInternal(queryable, query, wsId, morphTypes); var ordered = filtered - .OrderByDescending(t => t.HeadwordMatches) + .OrderByDescending(t => t.HeadwordMatches ? 0 : Sql.Ext.SQLite().Rank(t.SearchRecord)) .ThenByDescending(t => t.HeadwordPrefixMatches) - .ThenBy(t => t.HeadwordMatches ? t.SearchRecord.Headword.Length : int.MaxValue) - .ThenBy(t => - t.HeadwordMatches - ? t.SearchRecord.Headword.CollateUnicode(wsId) - : string.Empty) - .ThenBy(t => Sql.Ext.SQLite().Rank(t.SearchRecord)).ThenBy(t => t.Entry.Id); + .ThenBy(t => t.Headword.Length) + .ThenBy(t => t.Headword.CollateUnicode(wsId)) + .ThenBy(t => t.HeadwordMatches + ? morphTypeTable.Where(mt => mt.Kind == t.Entry.MorphType || mt.Kind == MorphTypeKind.Stem) + .OrderBy(mt => mt.Kind == MorphTypeKind.Stem ? 1 : 0) // stem is the fallback, so it should come last + .Select(mt => mt.SecondaryOrder).FirstOrDefault() + : int.MaxValue) + // .ThenBy(t => t.Entry.HomographNumber) + .ThenBy(t => t.Entry.Id); return ordered.Select(t => t.Entry); } - private sealed record FilterProjection(Entry Entry, EntrySearchRecord SearchRecord, bool HeadwordMatches, bool HeadwordPrefixMatches); + private sealed record FilterProjection(Entry Entry, EntrySearchRecord SearchRecord, string Headword, bool HeadwordMatches, bool HeadwordPrefixMatches); - private IQueryable FilterInternal(IQueryable queryable, string query) + private IQueryable FilterInternal(IQueryable queryable, string query, WritingSystemId wsId, MorphType[] morphTypes) { var ftsString = ToFts5LiteralString(query); + var queryWithoutMorphTokens = StripMorphTokens(query, morphTypes); - //starting from EntrySearchRecordsTable rather than queryable otherwise linq2db loses track of the table return from searchRecord in EntrySearchRecordsTable from entry in queryable.InnerJoin(r => r.Id == searchRecord.Id) where Sql.Ext.SQLite().Match(searchRecord, ftsString) && - (entry.LexemeForm.SearchValue(query) + (entry.LexemeForm.SearchValue(queryWithoutMorphTokens) || entry.CitationForm.SearchValue(query) - || entry.Senses.Any(s => s.Gloss.SearchValue(query))) - let headwordMatches = SqlHelpers.ContainsIgnoreCaseAccents(searchRecord.Headword, query) - let headwordPrefixMatches = SqlHelpers.StartsWithIgnoreCaseAccents(searchRecord.Headword, query) - select new FilterProjection(entry, searchRecord, headwordMatches, headwordPrefixMatches); + || entry.Senses.Any(s => s.Gloss.SearchValue(query)) + || SqlHelpers.ContainsIgnoreCaseAccents(entry.Headword(wsId), query)) + // this does not include morph tokens, which is actually what we want. Morph-tokens should not affect sorting. + // If the user uses a citation form with morph tokens, then oh well. Not even FLEx trips the morph-tokens before sorting in that case. + let headword = entry.Headword(wsId) + let headwordQuery = string.IsNullOrEmpty(Json.Value(entry.CitationForm, ms => ms[wsId])) + ? queryWithoutMorphTokens : query + let headwordMatches = SqlHelpers.ContainsIgnoreCaseAccents(headword, headwordQuery) + let headwordPrefixMatches = SqlHelpers.StartsWithIgnoreCaseAccents(headword, headwordQuery) + select new FilterProjection(entry, searchRecord, headword, headwordMatches, headwordPrefixMatches); + } + + private static string StripMorphTokens(string input, MorphType[] morphTypes) + { + if (string.IsNullOrEmpty(input)) return input; + + var bestMatchScore = 0; + MorphType? bestMorphTypeMatch = null; + + foreach (var morphType in morphTypes) + { + var currMatchScore = 0; + if (morphType.Prefix is not null && input.StartsWith(morphType.Prefix)) + currMatchScore += 2; // prefer leading tokens + if (morphType.Postfix is not null && input.EndsWith(morphType.Postfix)) + currMatchScore += 1; + + if (currMatchScore > bestMatchScore) + { + bestMorphTypeMatch = morphType; + bestMatchScore = currMatchScore; + } + } + + if (bestMorphTypeMatch is not null) + { + var result = input; + if (bestMorphTypeMatch.Prefix is not null && input.StartsWith(bestMorphTypeMatch.Prefix)) + result = result[bestMorphTypeMatch.Prefix.Length..]; + if (bestMorphTypeMatch.Postfix is not null && input.EndsWith(bestMorphTypeMatch.Postfix)) + result = result[..^bestMorphTypeMatch.Postfix.Length]; + return result; + } + + return input; } private static string ToFts5LiteralString(string query) @@ -166,7 +212,8 @@ public async Task UpdateEntrySearchTable(Guid entryId) public async Task UpdateEntrySearchTable(Entry entry) { var writingSystems = await dbContext.WritingSystemsOrdered.ToArrayAsync(); - var record = ToEntrySearchRecord(entry, writingSystems); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); + var record = ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup); await InsertOrUpdateEntrySearchRecord(record, EntrySearchRecordsTable); } @@ -214,7 +261,8 @@ public static async Task UpdateEntrySearchTable(IEnumerable entries, return ws1.Id.CompareTo(ws2.Id); }); var entrySearchRecordsTable = dbContext.GetTable(); - var searchRecords = entries.Select(entry => ToEntrySearchRecord(entry, writingSystems)); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); + var searchRecords = entries.Select(entry => ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup)); foreach (var entrySearchRecord in searchRecords) { //can't use bulk copy here because that creates duplicate rows @@ -232,11 +280,12 @@ public async Task RegenerateEntrySearchTable() await EntrySearchRecordsTable.TruncateAsync(); var writingSystems = await dbContext.WritingSystemsOrdered.ToArrayAsync(); + var morphTypeDataLookup = await dbContext.MorphTypes.ToDictionaryAsync(m => m.Kind); await EntrySearchRecordsTable .BulkCopyAsync(dbContext.Set() .LoadWith(e => e.Senses) .AsQueryable() - .Select(entry => ToEntrySearchRecord(entry, writingSystems)) + .Select(entry => ToEntrySearchRecord(entry, writingSystems, morphTypeDataLookup)) .AsAsyncEnumerable()); await transaction.CommitAsync(); } @@ -256,12 +305,21 @@ private async Task HasMissingEntries() return await EntrySearchRecordsTable.CountAsync() != await dbContext.Set().CountAsync(); } - private static EntrySearchRecord ToEntrySearchRecord(Entry entry, WritingSystem[] writingSystems) + private static EntrySearchRecord ToEntrySearchRecord(Entry entry, WritingSystem[] writingSystems, + IReadOnlyDictionary morphTypeDataLookup) { + // Include headwords (with morph tokens) for ALL vernacular writing systems (space-separated). + // This ensures FTS matches across all WS, including morph-token-decorated forms. + var headwords = EntryQueryHelpers.ComputeHeadwords(entry, morphTypeDataLookup); + var headword = string.Join(" ", + writingSystems.Where(ws => ws.Type == WritingSystemType.Vernacular) + .Select(ws => headwords[ws.WsId]) + .Where(h => !string.IsNullOrEmpty(h))); + return new EntrySearchRecord() { Id = entry.Id, - Headword = entry.Headword(writingSystems.First(ws => ws.Type == WritingSystemType.Vernacular).WsId), + Headword = headword, LexemeForm = LexemeForm(writingSystems, entry), CitationForm = CitationForm(writingSystems, entry), Definition = Definition(writingSystems, entry), diff --git a/backend/FwLite/LcmCrdt/Json.cs b/backend/FwLite/LcmCrdt/Json.cs index ad9bebf943..71dfe76294 100644 --- a/backend/FwLite/LcmCrdt/Json.cs +++ b/backend/FwLite/LcmCrdt/Json.cs @@ -164,6 +164,15 @@ private static Expression>> QueryExpression return (values) => values.QueryInternal().Select(v => v.Value); } + [ExpressionMethod(nameof(QueryEntriesExpressionMultiString))] + internal static IQueryable> QueryEntries(MultiString values) + { + return values.Values.Select(kv => new JsonEach(kv.Value, kv.Key.Code, "", 0, "", "")).AsQueryable(); + } + + private static Expression>>> QueryEntriesExpressionMultiString() => + (values) => values.QueryInternal(); + //indicates that linq2db should rewrite Sense.SemanticDomains.Query(d => d.Code) //into code in QueryExpression: Sense.SemanticDomains.QueryInternal().Select(v => Sql.Value(v.Value, d => d.Code)) [ExpressionMethod(nameof(QuerySelectExpression))] @@ -204,7 +213,7 @@ public static string ToString(Guid? guid) } //maps to a row from json_each - private record JsonEach( + internal record JsonEach( [property: Column("value")] T Value, [property: Column("key")] string Key, [property: Column("type")] string Type, diff --git a/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.Designer.cs b/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.Designer.cs new file mode 100644 index 0000000000..d67a97f6e0 --- /dev/null +++ b/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.Designer.cs @@ -0,0 +1,785 @@ +// +using System; +using System.Collections.Generic; +using LcmCrdt; +using Microsoft.EntityFrameworkCore; +using Microsoft.EntityFrameworkCore.Infrastructure; +using Microsoft.EntityFrameworkCore.Migrations; +using Microsoft.EntityFrameworkCore.Storage.ValueConversion; + +#nullable disable + +namespace LcmCrdt.Migrations +{ + [DbContext(typeof(LcmCrdtDbContext))] + [Migration("20260318120000_RegenerateSearchTableForMorphTypes")] + partial class RegenerateSearchTableForMorphTypes + { + /// + protected override void BuildTargetModel(ModelBuilder modelBuilder) + { +#pragma warning disable 612, 618 + modelBuilder.HasAnnotation("ProductVersion", "9.0.6"); + + modelBuilder.Entity("LcmCrdt.FullTextSearch.EntrySearchRecord", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("CitationForm") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Definition") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Gloss") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Headword") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("LexemeForm") + .IsRequired() + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.ToTable("EntrySearchRecord", null, t => + { + t.ExcludeFromMigrations(); + }); + }); + + modelBuilder.Entity("LcmCrdt.ProjectData", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("ClientId") + .HasColumnType("TEXT"); + + b.Property("Code") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("FwProjectId") + .HasColumnType("TEXT"); + + b.Property("LastUserId") + .HasColumnType("TEXT"); + + b.Property("LastUserName") + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("OriginDomain") + .HasColumnType("TEXT"); + + b.Property("Role") + .IsRequired() + .ValueGeneratedOnAdd() + .HasColumnType("TEXT") + .HasDefaultValue("Editor"); + + b.HasKey("Id"); + + b.ToTable("ProjectData"); + }); + + modelBuilder.Entity("MiniLcm.Models.ComplexFormComponent", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("ComplexFormEntryId") + .HasColumnType("TEXT"); + + b.Property("ComplexFormHeadword") + .HasColumnType("TEXT"); + + b.Property("ComponentEntryId") + .HasColumnType("TEXT"); + + b.Property("ComponentHeadword") + .HasColumnType("TEXT"); + + b.Property("ComponentSenseId") + .HasColumnType("TEXT") + .HasColumnName("ComponentSenseId"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Order") + .HasColumnType("REAL"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("ComponentEntryId"); + + b.HasIndex("ComponentSenseId"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.HasIndex("ComplexFormEntryId", "ComponentEntryId") + .IsUnique() + .HasFilter("ComponentSenseId IS NULL"); + + b.HasIndex("ComplexFormEntryId", "ComponentEntryId", "ComponentSenseId") + .IsUnique() + .HasFilter("ComponentSenseId IS NOT NULL"); + + b.ToTable("ComplexFormComponents", (string)null); + }); + + modelBuilder.Entity("MiniLcm.Models.ComplexFormType", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("ComplexFormType"); + }); + + modelBuilder.Entity("MiniLcm.Models.Entry", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("CitationForm") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("ComplexFormTypes") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("LexemeForm") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("LiteralMeaning") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("MorphType") + .HasColumnType("INTEGER"); + + b.Property("Note") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("PublishIn") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("Entry"); + }); + + modelBuilder.Entity("MiniLcm.Models.ExampleSentence", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Order") + .HasColumnType("REAL"); + + b.Property("Reference") + .HasColumnType("jsonb"); + + b.Property("SenseId") + .HasColumnType("TEXT"); + + b.Property("Sentence") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.Property("Translations") + .IsRequired() + .HasColumnType("jsonb"); + + b.HasKey("Id"); + + b.HasIndex("SenseId"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("ExampleSentence"); + }); + + modelBuilder.Entity("MiniLcm.Models.MorphType", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("Abbreviation") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Description") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Kind") + .HasColumnType("INTEGER"); + + b.Property("Name") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Postfix") + .HasColumnType("TEXT"); + + b.Property("Prefix") + .HasColumnType("TEXT"); + + b.Property("SecondaryOrder") + .HasColumnType("INTEGER"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("Kind") + .IsUnique(); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("MorphType"); + }); + + modelBuilder.Entity("MiniLcm.Models.PartOfSpeech", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Predefined") + .HasColumnType("INTEGER"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("PartOfSpeech"); + }); + + modelBuilder.Entity("MiniLcm.Models.Publication", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("Publication"); + }); + + modelBuilder.Entity("MiniLcm.Models.SemanticDomain", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("Code") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Predefined") + .HasColumnType("INTEGER"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("SemanticDomain"); + }); + + modelBuilder.Entity("MiniLcm.Models.Sense", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("Definition") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("EntryId") + .HasColumnType("TEXT"); + + b.Property("Gloss") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Order") + .HasColumnType("REAL"); + + b.Property("PartOfSpeechId") + .HasColumnType("TEXT"); + + b.Property("SemanticDomains") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("EntryId"); + + b.HasIndex("PartOfSpeechId"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("Sense"); + }); + + modelBuilder.Entity("MiniLcm.Models.WritingSystem", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("Abbreviation") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("Exemplars") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("Font") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Name") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Order") + .HasColumnType("REAL"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.Property("Type") + .HasColumnType("INTEGER"); + + b.Property("WsId") + .IsRequired() + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.HasIndex("WsId", "Type") + .IsUnique(); + + b.ToTable("WritingSystem"); + }); + + modelBuilder.Entity("SIL.Harmony.Commit", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("ClientId") + .HasColumnType("TEXT"); + + b.Property("Hash") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("Metadata") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("ParentHash") + .IsRequired() + .HasColumnType("TEXT"); + + b.ComplexProperty>("HybridDateTime", "SIL.Harmony.Commit.HybridDateTime#HybridDateTime", b1 => + { + b1.IsRequired(); + + b1.Property("Counter") + .HasColumnType("INTEGER") + .HasColumnName("Counter"); + + b1.Property("DateTime") + .HasColumnType("TEXT") + .HasColumnName("DateTime"); + }); + + b.HasKey("Id"); + + b.ToTable("Commits", (string)null); + }); + + modelBuilder.Entity("SIL.Harmony.Core.ChangeEntity", b => + { + b.Property("CommitId") + .HasColumnType("TEXT"); + + b.Property("Index") + .HasColumnType("INTEGER"); + + b.Property("Change") + .HasColumnType("jsonb"); + + b.Property("EntityId") + .HasColumnType("TEXT"); + + b.HasKey("CommitId", "Index"); + + b.ToTable("ChangeEntities", (string)null); + }); + + modelBuilder.Entity("SIL.Harmony.Db.ObjectSnapshot", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("CommitId") + .HasColumnType("TEXT"); + + b.Property("Entity") + .IsRequired() + .HasColumnType("jsonb"); + + b.Property("EntityId") + .HasColumnType("TEXT"); + + b.Property("EntityIsDeleted") + .HasColumnType("INTEGER"); + + b.Property("IsRoot") + .HasColumnType("INTEGER"); + + b.PrimitiveCollection("References") + .IsRequired() + .HasColumnType("TEXT"); + + b.Property("TypeName") + .IsRequired() + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("EntityId"); + + b.HasIndex("CommitId", "EntityId") + .IsUnique(); + + b.ToTable("Snapshots", (string)null); + }); + + modelBuilder.Entity("SIL.Harmony.Resource.LocalResource", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("LocalPath") + .IsRequired() + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.ToTable("LocalResource"); + }); + + modelBuilder.Entity("SIL.Harmony.Resource.RemoteResource", b => + { + b.Property("Id") + .ValueGeneratedOnAdd() + .HasColumnType("TEXT"); + + b.Property("DeletedAt") + .HasColumnType("TEXT"); + + b.Property("RemoteId") + .HasColumnType("TEXT"); + + b.Property("SnapshotId") + .HasColumnType("TEXT"); + + b.HasKey("Id"); + + b.HasIndex("SnapshotId") + .IsUnique(); + + b.ToTable("RemoteResource"); + }); + + modelBuilder.Entity("MiniLcm.Models.ComplexFormComponent", b => + { + b.HasOne("MiniLcm.Models.Entry", null) + .WithMany("Components") + .HasForeignKey("ComplexFormEntryId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("MiniLcm.Models.Entry", null) + .WithMany("ComplexForms") + .HasForeignKey("ComponentEntryId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("MiniLcm.Models.Sense", null) + .WithMany() + .HasForeignKey("ComponentSenseId") + .OnDelete(DeleteBehavior.Cascade); + + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.ComplexFormComponent", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.ComplexFormType", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.ComplexFormType", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.Entry", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.Entry", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.ExampleSentence", b => + { + b.HasOne("MiniLcm.Models.Sense", null) + .WithMany("ExampleSentences") + .HasForeignKey("SenseId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.ExampleSentence", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.MorphType", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.MorphType", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.PartOfSpeech", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.PartOfSpeech", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.Publication", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.Publication", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.SemanticDomain", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.SemanticDomain", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.Sense", b => + { + b.HasOne("MiniLcm.Models.Entry", null) + .WithMany("Senses") + .HasForeignKey("EntryId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.HasOne("MiniLcm.Models.PartOfSpeech", "PartOfSpeech") + .WithMany() + .HasForeignKey("PartOfSpeechId") + .OnDelete(DeleteBehavior.SetNull); + + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.Sense", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + + b.Navigation("PartOfSpeech"); + }); + + modelBuilder.Entity("MiniLcm.Models.WritingSystem", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("MiniLcm.Models.WritingSystem", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("SIL.Harmony.Core.ChangeEntity", b => + { + b.HasOne("SIL.Harmony.Commit", null) + .WithMany("ChangeEntities") + .HasForeignKey("CommitId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + }); + + modelBuilder.Entity("SIL.Harmony.Db.ObjectSnapshot", b => + { + b.HasOne("SIL.Harmony.Commit", "Commit") + .WithMany("Snapshots") + .HasForeignKey("CommitId") + .OnDelete(DeleteBehavior.Cascade) + .IsRequired(); + + b.Navigation("Commit"); + }); + + modelBuilder.Entity("SIL.Harmony.Resource.RemoteResource", b => + { + b.HasOne("SIL.Harmony.Db.ObjectSnapshot", null) + .WithOne() + .HasForeignKey("SIL.Harmony.Resource.RemoteResource", "SnapshotId") + .OnDelete(DeleteBehavior.SetNull); + }); + + modelBuilder.Entity("MiniLcm.Models.Entry", b => + { + b.Navigation("ComplexForms"); + + b.Navigation("Components"); + + b.Navigation("Senses"); + }); + + modelBuilder.Entity("MiniLcm.Models.Sense", b => + { + b.Navigation("ExampleSentences"); + }); + + modelBuilder.Entity("SIL.Harmony.Commit", b => + { + b.Navigation("ChangeEntities"); + + b.Navigation("Snapshots"); + }); +#pragma warning restore 612, 618 + } + } +} diff --git a/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.cs b/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.cs new file mode 100644 index 0000000000..a6f9e1d491 --- /dev/null +++ b/backend/FwLite/LcmCrdt/Migrations/20260318120000_RegenerateSearchTableForMorphTypes.cs @@ -0,0 +1,23 @@ +using Microsoft.EntityFrameworkCore.Migrations; + +#nullable disable + +namespace LcmCrdt.Migrations +{ + /// + public partial class RegenerateSearchTableForMorphTypes : Migration + { + /// + protected override void Up(MigrationBuilder migrationBuilder) + { + // Force FTS rebuild so headwords include morph-type prefix/postfix tokens + migrationBuilder.Sql("DELETE FROM EntrySearchRecord;"); + } + + /// + protected override void Down(MigrationBuilder migrationBuilder) + { + // FTS table will be lazily regenerated + } + } +} diff --git a/backend/FwLite/LcmCrdt/Objects/PreDefinedData.cs b/backend/FwLite/LcmCrdt/Objects/PreDefinedData.cs index 75c5b72dcc..21cf537c9b 100644 --- a/backend/FwLite/LcmCrdt/Objects/PreDefinedData.cs +++ b/backend/FwLite/LcmCrdt/Objects/PreDefinedData.cs @@ -1,4 +1,5 @@ using LcmCrdt.Changes; +using MiniLcm.Models; using SIL.Harmony; namespace LcmCrdt.Objects; @@ -42,4 +43,11 @@ await dataModel.AddChanges(clientId, ], new Guid("023faebb-711b-4d2f-b34f-a15621fc66bb")); } + + internal static async Task PredefinedMorphTypes(DataModel dataModel, Guid clientId) + { + await dataModel.AddChanges(clientId, + CanonicalMorphTypes.All.Values.Select(mt => new CreateMorphTypeChange(mt)).ToArray(), + new Guid("a7b2c3d4-e5f6-4a8b-9c0d-1e2f3a4b5c6d")); + } } diff --git a/backend/FwLite/LcmCrdt/QueryHelpers.cs b/backend/FwLite/LcmCrdt/QueryHelpers.cs index 57753f4f31..37e151c271 100644 --- a/backend/FwLite/LcmCrdt/QueryHelpers.cs +++ b/backend/FwLite/LcmCrdt/QueryHelpers.cs @@ -4,7 +4,8 @@ namespace LcmCrdt; public static class QueryHelpers { - public static void Finalize(this Entry entry, IComparer complexFormComparer) + public static void Finalize(this Entry entry, + IComparer complexFormComparer) { entry.Senses.ApplySortOrder(); entry.Components.ApplySortOrder(); diff --git a/backend/FwLite/LcmCrdt/SqlHelpers.cs b/backend/FwLite/LcmCrdt/SqlHelpers.cs index d14a6b1876..4b2d4fe0d5 100644 --- a/backend/FwLite/LcmCrdt/SqlHelpers.cs +++ b/backend/FwLite/LcmCrdt/SqlHelpers.cs @@ -36,4 +36,7 @@ private static Expression> SearchValueExpression [Sql.Expression(CustomSqliteFunctionInterceptor.StartsWithFunction + "({0}, {1})")] public static bool StartsWithIgnoreCaseAccents(string s, string search) => s.StartsWithDiacriticMatch(search); + + [Sql.Expression("({0} || {1} || {2})", PreferServerSide = true)] + public static string ConcatTokens(string leading, string value, string trailing) => leading + value + trailing; } diff --git a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs index 801235e33a..c8f9269540 100644 --- a/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/QueryEntryTestsBase.cs @@ -388,25 +388,25 @@ public async Task CanFilterToExampleSentenceWithMissingSentence() [Theory] [InlineData("a", "a", true)] - [InlineData("a", "A", false)] - [InlineData("A", "Ã", false)] - [InlineData("ap", "apple", false)] - [InlineData("ap", "APPLE", false)] - [InlineData("ing", "walking", false)] - [InlineData("ing", "WALKING", false)] - [InlineData("Ãp", "Ãpple", false)] - [InlineData("Ãp", "ãpple", false)] - [InlineData("ap", "Ãpple", false)] - [InlineData("app", "Ãpple", false)]//crdt fts only kicks in at 3 chars - [InlineData("й", "й", false)] // D, C - [InlineData("й", "й", false)] // C, D + [InlineData("a", "A")] + [InlineData("A", "Ã")] + [InlineData("ap", "apple")] + [InlineData("ap", "APPLE")] + [InlineData("ing", "walking")] + [InlineData("ing", "WALKING")] + [InlineData("Ãp", "Ãpple")] + [InlineData("Ãp", "ãpple")] + [InlineData("ap", "Ãpple")] + [InlineData("app", "Ãpple")]//crdt fts only kicks in at 3 chars + [InlineData("й", "й")] // D, C + [InlineData("й", "й")] // C, D [InlineData("й", "й", true)] // C, C [InlineData("й", "й", true)] // D, D - [InlineData("ймыл", "ймыл", false)] // D, C - [InlineData("ймыл", "ймыл", false)] // C, D + [InlineData("ймыл", "ймыл")] // D, C + [InlineData("ймыл", "ймыл")] // C, D [InlineData("ймыл", "ймыл", true)] // C, C [InlineData("ймыл", "ймыл", true)] // D, D - public async Task SuccessfulMatches(string searchTerm, string word, bool identical) + public async Task SuccessfulMatches(string searchTerm, string word, bool identical = false) { // identical is to make the test cases more readable when they only differ in their normalization (searchTerm == word).Should().Be(identical); @@ -515,6 +515,95 @@ public async Task PunctuationWorks(string searchTerm, string word) var results = await Api.SearchEntries(searchTerm).Select(e => e.LexemeForm["en"]).ToArrayAsync(); results.Should().Contain(word); } + + // This test guards against the mistake of only matching on headword + [Theory] + [InlineData("mango")] // FTS + [InlineData("m")] // non-FTS + public async Task SearchEntries_MatchesLexeme(string searchTerm) + { + var prefixQuery = $"{searchTerm}-"; + var lexemeOnlyMatchEntry = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Stem, + }); + var lexemeOnlyMatchWithMorphToken = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Prefix, + }); + var entries = await Api.SearchEntries(searchTerm).ToArrayAsync(); + entries.Should().Contain(e => e.Id == lexemeOnlyMatchEntry.Id); + entries.Should().Contain(e => e.Id == lexemeOnlyMatchWithMorphToken.Id); + } + + [Theory] + [InlineData("mango-")] // FTS + [InlineData("o-")] // non-FTS + public async Task SearchEntries_CitationFormOverridesMorphTokens(string searchTerm) + { + var prefixQuery = $"{searchTerm}-"; + var entryWithOverriddenMorphToken = await Api.CreateEntry(new Entry + { + LexemeForm = { ["en"] = "mango" }, + // citation form overrides "mango-" + CitationForm = { ["en"] = "zzzzzzzz" }, + MorphType = MorphTypeKind.Prefix, + }); + var entries = await Api.SearchEntries(searchTerm).ToArrayAsync(); + entries.Should().NotContain(e => e.Id == entryWithOverriddenMorphToken.Id); + } + + [Theory] + [InlineData("mango-")] // FTS + [InlineData("o-")] // non-FTS + public async Task MorphTokenSearch_FindsPrefixEntry(string searchTerm) + { + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Prefix }); + + var results = await Api.SearchEntries(searchTerm).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } + + [Theory] + [InlineData("-mango")] // FTS + [InlineData("-m")] // non-FTS + public async Task MorphTokenSearch_FindsSuffixEntry(string searchTerm) + { + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Suffix }); + + var results = await Api.SearchEntries(searchTerm).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } + + [Fact] + public async Task MorphTokenSearch_DoesNotMatchWithoutToken() + { + await Api.CreateEntry(new Entry { LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Root }); + + // Searching for "-mango" should NOT match a Root entry (no morph tokens) + var results = await Api.SearchEntries("mango-").Select(e => e.LexemeForm["en"]).ToArrayAsync(); + results.Should().NotContain("mango"); + } + + [Theory] + [InlineData("mango", SortField.Headword)] // FTS + [InlineData("m", SortField.Headword)] // non-FTS + [InlineData("mango", SortField.SearchRelevance)] // FTS + [InlineData("m", SortField.SearchRelevance)] // non-FTS + public async Task SearchEntries_EntryWithNoMorphTypeData_Works(string searchTerm, SortField sortField) + { + // MorphType.Unknown will likely not be included in the morph-type DB-table + var id = Guid.NewGuid(); + await Api.CreateEntry(new Entry { Id = id, LexemeForm = { ["en"] = "mango" }, MorphType = MorphTypeKind.Unknown }); + var results = await Api.SearchEntries(searchTerm, new(new(sortField))).ToArrayAsync(); + results.Should().Contain(e => e.Id == id); + } } // A seperate class to preserve the readability of the results in the main test class diff --git a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs index 262d6b7ea4..e4d19fbff2 100644 --- a/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs +++ b/backend/FwLite/MiniLcm.Tests/SortingTestsBase.cs @@ -1,19 +1,24 @@ +using MiniLcm.Tests.AutoFakerHelpers; +using Soenneker.Utils.AutoBogus; + namespace MiniLcm.Tests; public abstract class SortingTestsBase : MiniLcmTestBase { + protected static readonly AutoFaker Faker = new(AutoFakerDefault.Config); + private Task CreateEntry(string headword) { return Api.CreateEntry(new() { LexemeForm = { { "en", headword } }, }); } // ReSharper disable InconsistentNaming - const string Ru_A= "\u0410"; - const string Ru_a = "\u0430"; - const string Ru_Б= "\u0411"; - const string Ru_б = "\u0431"; - const string Ru_В= "\u0412"; - const string Ru_в = "\u0432"; + private const string Ru_A = "\u0410"; + private const string Ru_a = "\u0430"; + private const string Ru_Б = "\u0411"; + private const string Ru_б = "\u0431"; + private const string Ru_В = "\u0412"; + private const string Ru_в = "\u0432"; // ReSharper restore InconsistentNaming [Theory] @@ -49,4 +54,221 @@ await Api.CreateWritingSystem(new() await Api.GetEntries(new QueryOptions(new SortOptions(SortField.Headword, wsId))) .ToArrayAsync(); } + + [Theory] + [InlineData("aaaa", SortField.Headword)] // FTS + [InlineData("a", SortField.Headword)] // non-FTS + [InlineData("aaaa", SortField.SearchRelevance)] // FTS + [InlineData("a", SortField.SearchRelevance)] // non-FTS + public async Task MorphTokens_DoNotAffectSortOrder(string query, SortField sortField) + { + // All three entries have LexemeForm "aaaa". Their headwords are: + // Root: "aaaa" (no tokens) + // Prefix: "-aaaa" (leading token "-") + // Suffix: "aaaa-" (trailing token "-") + // Sort order should ignore morph tokens and differentiate only by SecondaryOrder. + Entry[] expected = [ + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Root }, // SecondaryOrder = 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Prefix }, // SecondaryOrder = 3 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "aaaa" }, MorphType = MorphTypeKind.Suffix }, // SecondaryOrder = 6 + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(query, new(new(sortField))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("aaaa")] // FTS rank + [InlineData("a")] // non-FTS rank + public async Task SecondaryOrder_Relevance_LexemeForm(string searchTerm) + { + static Entry[] CreateSortedEntrySet(string headword) + { + return [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = headword }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = lexeme }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = headword }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = lexeme }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + } + + var exactMatches = CreateSortedEntrySet("aaaa"); + var firstShortestStartsWithMatches = CreateSortedEntrySet("aaaab"); + var lastShortestStartsWithMatches = CreateSortedEntrySet("aaaac"); + var firstLongestStartsWithMatches = CreateSortedEntrySet("aaaabb"); + var lastLongestStartsWithMatches = CreateSortedEntrySet("aaaacc"); + var firstShortestContainsMatches = CreateSortedEntrySet("baaaa"); + var lastShortestContainsMatches = CreateSortedEntrySet("caaaa"); + var firstLongestContainsMatches = CreateSortedEntrySet("bbaaaa"); + var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); + + var entryId = Guid.NewGuid(); + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + + Entry[] expected = [ + .. exactMatches, + .. firstShortestStartsWithMatches, + .. lastShortestStartsWithMatches, + .. firstLongestStartsWithMatches, + .. lastLongestStartsWithMatches, + .. firstShortestContainsMatches, + .. lastShortestContainsMatches, + .. firstLongestContainsMatches, + .. lastLongestContainsMatches, + nonHeadwordMatch, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.SearchRelevance))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("aaaa")] // FTS rank + [InlineData("a")] // non-FTS rank + public async Task SecondaryOrder_Relevance_CitationForm(string searchTerm) + { + static Entry[] CreateSortedEntrySet(string headword) + { + return [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = headword }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + } + + var exactMatches = CreateSortedEntrySet("aaaa"); + var firstShortestStartsWithMatches = CreateSortedEntrySet("aaaab"); + var lastShortestStartsWithMatches = CreateSortedEntrySet("aaaac"); + var firstLongestStartsWithMatches = CreateSortedEntrySet("aaaabb"); + var lastLongestStartsWithMatches = CreateSortedEntrySet("aaaacc"); + var firstShortestContainsMatches = CreateSortedEntrySet("baaaa"); + var lastShortestContainsMatches = CreateSortedEntrySet("caaaa"); + var firstLongestContainsMatches = CreateSortedEntrySet("bbaaaa"); + var lastLongestContainsMatches = CreateSortedEntrySet("ccaaaa"); + + var entryId = Guid.NewGuid(); + Entry nonHeadwordMatch = new() { Id = entryId, Senses = [new() { EntryId = entryId, Gloss = { ["en"] = "aaaa" } }] }; + + Entry[] expected = [ + .. exactMatches, + .. firstShortestStartsWithMatches, + .. lastShortestStartsWithMatches, + .. firstLongestStartsWithMatches, + .. lastLongestStartsWithMatches, + .. firstShortestContainsMatches, + .. lastShortestContainsMatches, + .. firstLongestContainsMatches, + .. lastLongestContainsMatches, + nonHeadwordMatch, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.SearchRelevance))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("baaa")] // FTS rank + [InlineData("b")] // non-FTS rank + public async Task SecondaryOrder_Headword_LexemeForm(string searchTerm) + { + Entry[] expected = [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "abaaa" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), LexemeForm = { ["en"] = "baaa" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.Headword))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } + + [Theory] + [InlineData("baaa")] // FTS rank + [InlineData("b")] // non-FTS rank + public async Task SecondaryOrder_Headword_CitationForm(string searchTerm) + { + Entry[] expected = [ + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "abaaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + // Root/Stem - SecondaryOrder: 1 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Root/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.Stem, HomographNumber = 2 }, + // BoundRoot/BoundStem - SecondaryOrder: 2 + new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundRoot/*, HomographNumber = 1*/ }, + // new() { Id = Guid.NewGuid(), CitationForm = { ["en"] = "baaa" }, LexemeForm = { ["en"] = "❌" }, MorphType = MorphTypeKind.BoundStem, HomographNumber = 2 }, + ]; + + var ids = expected.Select(e => e.Id).ToHashSet(); + + foreach (var entry in Faker.Faker.Random.Shuffle(expected)) + await Api.CreateEntry(entry); + + var results = (await Api.SearchEntries(searchTerm, new(new(SortField.Headword))).ToArrayAsync()) + .Where(e => ids.Contains(e.Id)) + .ToList(); + + results.Should().BeEquivalentTo(expected, + options => options); + results.Should().BeEquivalentTo(expected, + options => options.WithStrictOrdering()); + } } diff --git a/backend/FwLite/MiniLcm/Models/CanonicalMorphTypes.cs b/backend/FwLite/MiniLcm/Models/CanonicalMorphTypes.cs new file mode 100644 index 0000000000..a14964452a --- /dev/null +++ b/backend/FwLite/MiniLcm/Models/CanonicalMorphTypes.cs @@ -0,0 +1,184 @@ +using System.Collections.Frozen; + +namespace MiniLcm.Models; + +/// +/// Canonical morph-type definitions matching FieldWorks/LibLCM MoMorphTypeTags. +/// GUIDs match SIL.LCModel constants (kguidMorph*). Data verified against Sena 3 FwData project. +/// +public static class CanonicalMorphTypes +{ + public static readonly FrozenDictionary All = CreateAll().ToFrozenDictionary(m => m.Kind); + + private static MorphType[] CreateAll() => + [ + new() + { + Id = new Guid("d7f713e4-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.BoundRoot, + Name = new MultiString { { "en", "bound root" } }, + Abbreviation = new MultiString { { "en", "bd root" } }, + Prefix = "*", + SecondaryOrder = 10, + }, + new() + { + Id = new Guid("d7f713e7-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.BoundStem, + Name = new MultiString { { "en", "bound stem" } }, + Abbreviation = new MultiString { { "en", "bd stem" } }, + Prefix = "*", + SecondaryOrder = 10, + }, + new() + { + Id = new Guid("d7f713df-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Circumfix, + Name = new MultiString { { "en", "circumfix" } }, + Abbreviation = new MultiString { { "en", "cfx" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("c2d140e5-7ca9-41f4-a69a-22fc7049dd2c"), + Kind = MorphTypeKind.Clitic, + Name = new MultiString { { "en", "clitic" } }, + Abbreviation = new MultiString { { "en", "clit" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("d7f713e1-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Enclitic, + Name = new MultiString { { "en", "enclitic" } }, + Abbreviation = new MultiString { { "en", "enclit" } }, + Prefix = "=", + SecondaryOrder = 80, + }, + new() + { + Id = new Guid("d7f713da-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Infix, + Name = new MultiString { { "en", "infix" } }, + Abbreviation = new MultiString { { "en", "ifx" } }, + Prefix = "-", + Postfix = "-", + SecondaryOrder = 40, + }, + new() + { + Id = new Guid("56db04bf-3d58-44cc-b292-4c8aa68538f4"), + Kind = MorphTypeKind.Particle, + Name = new MultiString { { "en", "particle" } }, + Abbreviation = new MultiString { { "en", "part" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("d7f713db-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Prefix, + Name = new MultiString { { "en", "prefix" } }, + Abbreviation = new MultiString { { "en", "pfx" } }, + Postfix = "-", + SecondaryOrder = 20, + }, + new() + { + Id = new Guid("d7f713e2-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Proclitic, + Name = new MultiString { { "en", "proclitic" } }, + Abbreviation = new MultiString { { "en", "proclit" } }, + Postfix = "=", + SecondaryOrder = 30, + }, + new() + { + Id = new Guid("d7f713e5-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Root, + Name = new MultiString { { "en", "root" } }, + Abbreviation = new MultiString { { "en", "ubd root" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("d7f713dc-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Simulfix, + Name = new MultiString { { "en", "simulfix" } }, + Abbreviation = new MultiString { { "en", "smfx" } }, + Prefix = "=", + Postfix = "=", + SecondaryOrder = 60, + }, + new() + { + Id = new Guid("d7f713e8-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Stem, + Name = new MultiString { { "en", "stem" } }, + Abbreviation = new MultiString { { "en", "ubd stem" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("d7f713dd-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Suffix, + Name = new MultiString { { "en", "suffix" } }, + Abbreviation = new MultiString { { "en", "sfx" } }, + Prefix = "-", + SecondaryOrder = 70, + }, + new() + { + Id = new Guid("d7f713de-e8cf-11d3-9764-00c04f186933"), + Kind = MorphTypeKind.Suprafix, + Name = new MultiString { { "en", "suprafix" } }, + Abbreviation = new MultiString { { "en", "spfx" } }, + Prefix = "~", + Postfix = "~", + SecondaryOrder = 50, + }, + new() + { + Id = new Guid("18d9b1c3-b5b6-4c07-b92c-2fe1d2281bd4"), + Kind = MorphTypeKind.InfixingInterfix, + Name = new MultiString { { "en", "infixing interfix" } }, + Abbreviation = new MultiString { { "en", "ifxnfx" } }, + Prefix = "-", + Postfix = "-", + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("af6537b0-7175-4387-ba6a-36547d37fb13"), + Kind = MorphTypeKind.PrefixingInterfix, + Name = new MultiString { { "en", "prefixing interfix" } }, + Abbreviation = new MultiString { { "en", "pfxnfx" } }, + Postfix = "-", + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("3433683d-08a9-4bae-ae53-2a7798f64068"), + Kind = MorphTypeKind.SuffixingInterfix, + Name = new MultiString { { "en", "suffixing interfix" } }, + Abbreviation = new MultiString { { "en", "sfxnfx" } }, + Prefix = "-", + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("a23b6faa-1052-4f4d-984b-4b338bdaf95f"), + Kind = MorphTypeKind.Phrase, + Name = new MultiString { { "en", "phrase" } }, + Abbreviation = new MultiString { { "en", "phr" } }, + SecondaryOrder = 0, + }, + new() + { + Id = new Guid("0cc8c35a-cee9-434d-be58-5d29130fba5b"), + Kind = MorphTypeKind.DiscontiguousPhrase, + Name = new MultiString { { "en", "discontiguous phrase" } }, + Abbreviation = new MultiString { { "en", "dis phr" } }, + SecondaryOrder = 0, + }, + ]; +} diff --git a/backend/FwLite/MiniLcm/Models/Entry.cs b/backend/FwLite/MiniLcm/Models/Entry.cs index 575200aac8..dbf8ccbffa 100644 --- a/backend/FwLite/MiniLcm/Models/Entry.cs +++ b/backend/FwLite/MiniLcm/Models/Entry.cs @@ -35,6 +35,7 @@ public string Headword() { //order by code to ensure the headword is stable //todo choose ws by preference based on ws order/default + //todo this does not apply morph tokens — see #1284 //https://github.com/sillsdev/languageforge-lexbox/issues/1284 var word = CitationForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value; if (string.IsNullOrEmpty(word)) word = LexemeForm.Values.OrderBy(kvp => kvp.Key.Code).FirstOrDefault().Value; diff --git a/backend/FwLite/MiniLcm/Models/MorphType.cs b/backend/FwLite/MiniLcm/Models/MorphType.cs index d241a35ff5..281e62c96e 100644 --- a/backend/FwLite/MiniLcm/Models/MorphType.cs +++ b/backend/FwLite/MiniLcm/Models/MorphType.cs @@ -30,7 +30,7 @@ public enum MorphTypeKind public class MorphType : IObjectWithId { public virtual Guid Id { get; set; } - public virtual MorphTypeKind Kind { get; set; } + public virtual required MorphTypeKind Kind { get; set; } public virtual MultiString Name { get; set; } = []; public virtual MultiString Abbreviation { get; set; } = []; public virtual RichMultiString Description { get; set; } = []; diff --git a/backend/FwLite/Taskfile.yml b/backend/FwLite/Taskfile.yml index 6bf2a36965..b44a383921 100644 --- a/backend/FwLite/Taskfile.yml +++ b/backend/FwLite/Taskfile.yml @@ -119,3 +119,17 @@ tasks: cmd: dotnet test ../../FwLiteOnly.slnf --filter Category!=Slow test-full: cmd: dotnet test ../../FwLiteOnly.slnf + test-verified: + desc: Run only tests that generate verified snapshot files + # always produces exit status 1, because the framework isn't valid for all test projects + cmd: dotnet test ../../FwLiteOnly.slnf --filter "Category=Verified" --framework net9.0 --logger "console;verbosity=minimal" + reset-verified: + desc: Reset all verified snapshot files to their state on origin/develop + cmds: + - git fetch origin develop + - git checkout origin/develop -- ":(glob)**/*.verified.*" + reverify: + desc: Reset verified snapshots to origin/develop, then re-run verified tests to regenerate them + cmds: + - task: reset-verified + - task: test-verified diff --git a/frontend/viewer/src/project/demo/demo-entry-data.ts b/frontend/viewer/src/project/demo/demo-entry-data.ts index 33b8e32100..1b4013567a 100644 --- a/frontend/viewer/src/project/demo/demo-entry-data.ts +++ b/frontend/viewer/src/project/demo/demo-entry-data.ts @@ -101,6 +101,7 @@ export const writingSystems: IWritingSystems = { export const _entries: IEntry[] = [ { 'id': '34779c06-5a73-4fe9-8325-b110b23f9293', + 'headword': {}, 'lexemeForm': { 'seh': 'nyumba', 'seh-fonipa-x-etic': 'ɲumba', 'ny': 'nyumba' }, 'citationForm': { 'seh': '!nyumba', 'seh-fonipa-x-etic': 'ɲumba', 'ny': 'nyumba' }, 'literalMeaning': {}, @@ -167,6 +168,7 @@ export const _entries: IEntry[] = [ }, { 'id': '34779c06-5a73-4fe9-8325-b110b23f9294', + 'headword': {}, 'lexemeForm': { 'seh': 'dance', 'seh-fonipa-x-etic': 'dæns', 'ny': 'vina' }, 'citationForm': {}, 'literalMeaning': {}, diff --git a/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte b/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte index e2ce2f2113..4f2b4e4f37 100644 --- a/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte +++ b/frontend/viewer/src/stories/editor/entity-primitives/entry-editor-primitive.stories.svelte @@ -9,6 +9,9 @@ let entry: IEntry = $state({ id: '36b8f84d-df4e-4d49-b662-bcde71a8764f', + headword: { + 'seh': 'Citation form', + }, lexemeForm: { 'seh': 'Lexeme form', },