Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/Echo/src/EchoEngine/TranslationEngineServiceV1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ await _parallelCorpusService.PreprocessAsync(
(_, i) => new AlignedWordPair() { SourceIndex = i, TargetIndex = i }
),
},
Confidence = 1.0,
}
);
if (row.SourceSegment.Length > 0 && !isInTrainingData)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ CancellationToken cancellationToken
Translation = pretranslation.Translation,
SourceTokens = { pretranslation.SourceTokens },
TranslationTokens = { pretranslation.TranslationTokens },
Confidence = pretranslation.Confidence,
};
if (pretranslation.Alignment is not null)
request.Alignment.Add(pretranslation.Alignment.Select(Map));
Expand Down Expand Up @@ -83,6 +84,7 @@ JsonSerializerOptions options
sourceTokens = [],
translationTokens = [];
IReadOnlyList<SIL.Machine.Corpora.AlignedWordPair> alignedWordPairs = [];
double confidence = 0.0;
while (reader.Read() && reader.TokenType != JsonTokenType.EndObject)
{
if (reader.TokenType == JsonTokenType.PropertyName)
Expand Down Expand Up @@ -128,6 +130,10 @@ JsonSerializerOptions options
reader.Read();
alignedWordPairs = SIL.Machine.Corpora.AlignedWordPair.Parse(reader.GetString()).ToArray();
break;
case "sequenceConfidence":
reader.Read();
confidence = reader.GetDouble();
break;
default:
throw new JsonException(
$"Unexpected property name {s} when deserializing Pretranslation object"
Expand All @@ -145,6 +151,7 @@ JsonSerializerOptions options
Alignment = alignedWordPairs,
SourceTokens = sourceTokens,
TranslationTokens = translationTokens,
Confidence = confidence,
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ public record Pretranslation
public IEnumerable<string>? SourceTokens { get; init; }
public IEnumerable<string>? TranslationTokens { get; init; }
public IReadOnlyList<AlignedWordPair>? Alignment { get; init; }
public double Confidence { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -238,14 +238,13 @@ private static Translation.V1.TranslationSources Map(SIL.Machine.Translation.Tra

private static IEnumerable<Translation.V1.AlignedWordPair> Map(WordAlignmentMatrix source)
{
for (int i = 0; i < source.RowCount; i++)
{
for (int j = 0; j < source.ColumnCount; j++)
return source
.ToAlignedWordPairs()
.Select(wp => new Translation.V1.AlignedWordPair
{
if (source[i, j])
yield return new Translation.V1.AlignedWordPair { SourceIndex = i, TargetIndex = j };
}
}
SourceIndex = wp.SourceIndex,
TargetIndex = wp.TargetIndex,
});
}

private static Translation.V1.Phrase Map(SIL.Machine.Translation.Phrase source)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ await JsonSerializer.SerializeAsync(
["textId"] = "MAT",
["refs"] = new JsonArray { "MAT 1:1" },
["translation"] = "translation",
["sequenceConfidence"] = 0.5,
},
}
);
Expand All @@ -44,6 +45,7 @@ await JsonSerializer.SerializeAsync(
SourceRefs = { },
TargetRefs = { "MAT 1:1" },
Translation = "translation",
Confidence = 0.5,
},
Arg.Any<CancellationToken>()
);
Expand All @@ -67,7 +69,10 @@ await JsonSerializer.SerializeAsync(
["textId"] = "MAT",
["sourceRefs"] = new JsonArray { "MAT 1:1" },
["targetRefs"] = new JsonArray { "MAT 1:1" },
["sourceTokens"] = new JsonArray { "translation" },
["translationTokens"] = new JsonArray { "translation" },
["translation"] = "translation",
["alignment"] = "0-0",
},
}
);
Expand All @@ -88,6 +93,13 @@ await JsonSerializer.SerializeAsync(
SourceRefs = { "MAT 1:1" },
TargetRefs = { "MAT 1:1" },
Translation = "translation",
SourceTokens = { "translation" },
TranslationTokens = { "translation" },
Alignment =
{
new Translation.V1.AlignedWordPair { SourceIndex = 0, TargetIndex = 0 },
},
Confidence = 0.0,
},
Arg.Any<CancellationToken>()
);
Expand Down
3 changes: 3 additions & 0 deletions src/Serval/src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11659,6 +11659,9 @@ public partial class Pretranslation
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string Translation { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("confidence", Required = Newtonsoft.Json.Required.Always)]
public double Confidence { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.6.3.0 (NJsonSchema v11.5.2.0 (Newtonsoft.Json v13.0.0.0))")]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ message InsertPretranslationsRequest {
repeated string source_tokens = 7;
repeated string translation_tokens = 8;
repeated AlignedWordPair alignment = 9;
double confidence = 10;
}

message UpdateBuildExecutionDataRequest {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ public record PretranslationDto
[Obsolete]
public IReadOnlyList<string>? Refs { get; init; }
public required string Translation { get; init; }
public double Confidence { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -2004,6 +2004,7 @@ private static PretranslationDto Map(Pretranslation source)
TargetRefs = source.TargetRefs ?? [],
Refs = source.Refs,
Translation = source.Translation,
Confidence = source.Confidence ?? -1.0,
};
}

Expand Down
1 change: 1 addition & 0 deletions src/Serval/src/Serval.Translation/Models/Pretranslation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@ public class Pretranslation : IEntity
public IReadOnlyList<string>? SourceTokens { get; init; }
public IReadOnlyList<string>? TranslationTokens { get; init; }
public IReadOnlyList<AlignedWordPair>? Alignment { get; init; }
public double? Confidence { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ ServerCallContext context
SourceTokens = request.SourceTokens,
TranslationTokens = request.TranslationTokens,
Alignment = request.Alignment.Select(Map).ToList(),
Confidence = request.Confidence,
}
);
if (batch.Count == PretranslationInsertBatchSize)
Expand Down
8 changes: 7 additions & 1 deletion src/Serval/test/Serval.E2ETests/ServalApiTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -263,18 +263,21 @@ public async Task Nmt_Paratext()
},
];
_helperClient.TranslationBuildConfig.Options =
"{\"max_steps\":10, \"use_key_terms\":true, \"train_params\": {\"per_device_train_batch_size\":4}}";
"{\"max_steps\":50, \"use_key_terms\":true, \"parent_model_name\": \"facebook/nllb-200-distilled-600M\", \"train_params\": {\"per_device_train_batch_size\":4}, \"generate_params\":{\"num_beams\": 2}}";

await _helperClient.BuildEngineAsync(engineId);
Assert.That(
(await _helperClient.TranslationEnginesClient.GetAllBuildsAsync(engineId)).First().State,
Is.EqualTo(JobState.Completed)
);

IList<Pretranslation> translations = await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync(
engineId,
inferencingParallelCorpusId
);
Assert.That(translations, Is.Not.Empty);
Assert.That(translations[0].Confidence, Is.GreaterThan(0.0));

IList<Pretranslation> firstJohnTranslations =
await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync(
engineId,
Expand All @@ -283,19 +286,22 @@ await _helperClient.TranslationEnginesClient.GetAllPretranslationsAsync(
);
// Only non-scripture was translated
Assert.That(firstJohnTranslations.All(t => t.TargetRefs[0].Contains('/')));

string usfm = await _helperClient.TranslationEnginesClient.GetPretranslatedUsfmAsync(
engineId,
inferencingParallelCorpusId,
"REV"
);
Assert.That(usfm, Does.Contain("\\v 1"));

string usfmWithPlacedMarkers = await _helperClient.TranslationEnginesClient.GetPretranslatedUsfmAsync(
engineId,
inferencingParallelCorpusId,
"REV",
paragraphMarkerBehavior: PretranslationUsfmMarkerBehavior.PreservePosition
);
Assert.That(usfmWithPlacedMarkers, Is.Not.EqualTo(usfm));

string usfmWithDenormalizedQuotes = await _helperClient.TranslationEnginesClient.GetPretranslatedUsfmAsync(
engineId,
inferencingParallelCorpusId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,7 @@ public async Task UpdateCorpusAsync()
}

[Test]
public async Task DeletePretranslationsWhenParallelCorpusIsUpdatedAsync()
public async Task DeleteWordAlignmentsWhenParallelCorpusIsUpdatedAsync()
{
var env = new TestEnvironment();
Models.WordAlignment wordAlignment = new()
Expand All @@ -1266,7 +1266,7 @@ public async Task DeletePretranslationsWhenParallelCorpusIsUpdatedAsync()
}

[Test]
public async Task DeletePretranslationsWhenCorpusFilesAreDeletedAsync()
public async Task DeleteWordAlignmentsWhenCorpusFilesAreDeletedAsync()
{
var env = new TestEnvironment();
Models.WordAlignment wordAlignment = new()
Expand All @@ -1290,7 +1290,7 @@ public async Task DeletePretranslationsWhenCorpusFilesAreDeletedAsync()
}

[Test]
public async Task DeletePretranslationsWhenCorpusFilesAreUpdatedAsync()
public async Task DeleteWordAlignmentsWhenCorpusFilesAreUpdatedAsync()
{
var env = new TestEnvironment();
Models.WordAlignment wordAlignment = new()
Expand Down
Loading