diff --git a/src/BloomExe/ImageProcessing/ImageUtils.cs b/src/BloomExe/ImageProcessing/ImageUtils.cs index 66a84b7acd9b..27ab7e865ba0 100644 --- a/src/BloomExe/ImageProcessing/ImageUtils.cs +++ b/src/BloomExe/ImageProcessing/ImageUtils.cs @@ -1523,12 +1523,9 @@ Rectangle cropRectangle if (result.ExitCode != 0) { LogGraphicsMagickFailure(result); + return result; } - - var metadata = RobustFileIO.MetadataFromFile(sourcePath); - if (metadata != null && metadata.ExceptionCaughtWhileLoading == null) - metadata.Write(destPath); - + TrimMetadataInImage(sourcePath, destPath); return result; } @@ -2171,7 +2168,8 @@ private static T WithSafeFilePath( public static void ReallyCropImages( SafeXmlDocument bookDom, string imageSourceFolder, - string imageDestFolder + string imageDestFolder, + bool alsoTrimMetadataForUncroppedImages = false ) { var images = bookDom.SafeSelectNodes("//img").Cast().ToArray(); @@ -2216,39 +2214,105 @@ string imageDestFolder var canvasElement = imgContainer?.ParentNode as SafeXmlElement; var canvasElementStyle = canvasElement?.GetAttribute("style"); if (!SignifiesCropping(style) || canvasElementStyle == null) - continue; - var canvasElementWidth = GetNumberFromPx("width", canvasElementStyle); - var canvasElementHeight = GetNumberFromPx("height", canvasElementStyle); - - var key = $"{src}|{style}|{canvasElementWidth}|{canvasElementHeight}"; - if (cropped.TryGetValue(key, out string fileName)) { - // This is a duplicate. We can use the same cropped image file. - img.SetAttribute("src", fileName); - // With that src, it's already cropped, so remove the style to avoid - // applying the cropping again to the already-cropped image. - img.RemoveAttribute("style"); - continue; + if (alsoTrimMetadataForUncroppedImages) + TrimTheMetadata( + imageSourceFolder, + imageDestFolder, + cropped, + srcUsageCount, + replacedOriginals, + img, + src, + style + ); + } + else + { + CropTheImageAndMetadata( + imageSourceFolder, + imageDestFolder, + uncroppedSrcNames, + cropped, + srcUsageCount, + replacedOriginals, + img, + src, + style, + canvasElementStyle + ); } + } + DeleteOrphanedImageFiles(imageSourceFolder, imageDestFolder, images, replacedOriginals); + } - var needNewName = uncroppedSrcNames.Contains(src) || srcUsageCount[src] > 1; + private static void TrimTheMetadata( + string imageSourceFolder, + string imageDestFolder, + Dictionary cropped, + Dictionary srcUsageCount, + HashSet replacedOriginals, + SafeXmlElement img, + string src, + string style + ) + { + var key = $"{src}|{style}|0|0"; + if (cropped.TryGetValue(key, out string fileName)) + { + // This is a duplicate. We can use the same metadata-cropped image file. + img.SetAttribute("src", fileName); + return; + } + var trimmedFileName = TrimMetadataInImage(img, imageSourceFolder, imageDestFolder); + cropped[key] = src; + } - var croppedFileName = ReallyCropImage( - img, - imageSourceFolder, - imageDestFolder, - needNewName - ); + private static void CropTheImageAndMetadata( + string imageSourceFolder, + string imageDestFolder, + HashSet uncroppedSrcNames, + Dictionary cropped, + Dictionary srcUsageCount, + HashSet replacedOriginals, + SafeXmlElement img, + string src, + string style, + string canvasElementStyle + ) + { + var canvasElementWidth = GetNumberFromPx("width", canvasElementStyle); + var canvasElementHeight = GetNumberFromPx("height", canvasElementStyle); - // Track if we replaced an original file with a new one - if (croppedFileName != null && needNewName && croppedFileName != src) - { - replacedOriginals.Add(src); - } + var key = $"{src}|{style}|{canvasElementWidth}|{canvasElementHeight}"; + if (cropped.TryGetValue(key, out string fileName)) + { + // This is a duplicate. We can use the same cropped image file. + img.SetAttribute("src", fileName); + // With that src, it's already cropped, so remove the style to avoid + // applying the cropping again to the already-cropped image. + img.RemoveAttribute("style"); + return; + } - cropped[key] = croppedFileName; + var needNewName = uncroppedSrcNames.Contains(src) || srcUsageCount[src] > 1; + + var croppedFileName = ReallyCropImage( + img, + imageSourceFolder, + imageDestFolder, + needNewName + ); + + // Track if we replaced an original file with a new one + if (croppedFileName != null && needNewName && croppedFileName != src) + { + replacedOriginals.Add(src); } - DeleteOrphanedImageFiles(imageSourceFolder, imageDestFolder, images, replacedOriginals); + if (croppedFileName != null) + cropped[key] = croppedFileName; + else + cropped[key] = src; } private static void DeleteOrphanedImageFiles( @@ -2315,6 +2379,97 @@ private static bool SignifiesCropping(string imgStyle) return width > 0; } + private static string TrimMetadataInImage(string srcPath, string destPath) + { + // Try to reduce the metadata to just what we want for intellectual property and + // collection information. + try + { + var metadata = RobustFileIO.MetadataFromFile(srcPath); + if (metadata != null && metadata.ExceptionCaughtWhileLoading == null) + { + try + { + // Remove all metadata from the source file as well and then restore the + // minimal metadata we care about. The existing metadata can sometimes + // cause problems in the the TagSharp library. (BL-16058) + using (var tagFile = RobustFileIO.CreateTaglibFile(destPath)) + { + tagFile.RemoveTags(TagTypes.AllTags); + tagFile.Save(); + } + var newMeta = new Metadata + { + Creator = metadata.Creator, + License = metadata.License, + CopyrightNotice = metadata.CopyrightNotice, + AttributionUrl = metadata.AttributionUrl, + CollectionName = metadata.CollectionName, + CollectionUri = metadata.CollectionUri, + }; + newMeta.WriteIntellectualPropertyOnly(destPath); + } + catch (Exception e) + { + Logger.WriteError( + $"Error copying metadata from {srcPath} to {destPath}. ", + e + ); + } + } + } + catch (Exception e) + { + // This can happen in unit tests that have fake data. + Debug.WriteLine( + $"Exception caught while trying to trim metadata from {srcPath} to {destPath}.", + e + ); + } + return destPath; + } + + private static string TrimMetadataInImage( + SafeXmlElement img, + string imageSourceFolder, + string imageDestFolder + ) + { + var src = img.GetAttribute("src"); + var srcPath = UrlPathString.GetFullyDecodedPath(imageSourceFolder, ref src); + if (!RobustFile.Exists(srcPath)) + return null; + + var ext = Path.GetExtension(srcPath).ToLowerInvariant(); + if (ext == ".svg") + { + // SVG files don't have metadata to trim. + return null; + } + var trimmedFilePath = Path.ChangeExtension( + Path.Combine(imageDestFolder, Guid.NewGuid().ToString()), + ext + ); + + // We need an output file even if it's an identical copy. + RobustFile.Copy(srcPath, trimmedFilePath, true); + + TrimMetadataInImage(srcPath, trimmedFilePath); + // Capture this before we unencode, since it wants + // to be a value we can put in a src attribute (if it doesn't get changed) + // to lead to the file we may put at an unchanged file name. + var result = img.GetAttribute("src"); + // We don't need the path here, but this function may correct 'src' (e.g., + // removing some url encoding to find the actual file). And since we're not + // using a new name, we want to exactly overwrite the original image file. + UrlPathString.GetFullyDecodedPath(imageSourceFolder, ref src); + var destPath = Path.Combine(imageDestFolder, src); + RobustFile.Move(trimmedFilePath, destPath, true); + // If it failed, it should have already logged the reason. I think all we can do + // is leave the image alone. + return result; + } + private static string ReallyCropImage( SafeXmlElement img, string imageSourceFolder, diff --git a/src/BloomExe/Publish/PublishHelper.cs b/src/BloomExe/Publish/PublishHelper.cs index b9f356388254..2228dd244753 100644 --- a/src/BloomExe/Publish/PublishHelper.cs +++ b/src/BloomExe/Publish/PublishHelper.cs @@ -1051,7 +1051,8 @@ public static Book.Book MakeDeviceXmatterTempBook( ImageUtils.ReallyCropImages( modifiedBook.RawDom, modifiedBook.FolderPath, - modifiedBook.FolderPath + modifiedBook.FolderPath, + true ); // Must come after ReallyCropImages, because any cropping for background images is // destroyed by SimplifyBackgroundImages. @@ -1065,7 +1066,10 @@ public static Book.Book MakeDeviceXmatterTempBook( var childrenToRemove = dataDiv .ChildNodes.OfType() .Where(child => - child.ChildNodes.OfType().FirstOrDefault()?.HasClass("bloom-canvas") + child + .ChildNodes.OfType() + .FirstOrDefault() + ?.HasClass("bloom-canvas") ?? false ) .ToArray(); diff --git a/src/BloomExe/Spreadsheet/SpreadsheetExporter.cs b/src/BloomExe/Spreadsheet/SpreadsheetExporter.cs index 40a3903ab180..0d9b07b99ff0 100644 --- a/src/BloomExe/Spreadsheet/SpreadsheetExporter.cs +++ b/src/BloomExe/Spreadsheet/SpreadsheetExporter.cs @@ -121,7 +121,7 @@ public InternalSpreadsheet Export( { if (_outputImageFolder != null) { - ImageUtils.ReallyCropImages(dom.RawDom, bookFolderPath, _outputImageFolder); + ImageUtils.ReallyCropImages(dom.RawDom, bookFolderPath, _outputImageFolder, true); } _progress = progress ?? new NullWebSocketProgress(); diff --git a/src/BloomExe/WebLibraryIntegration/BookUpload.cs b/src/BloomExe/WebLibraryIntegration/BookUpload.cs index 9166deeeb3a5..0111480471ff 100644 --- a/src/BloomExe/WebLibraryIntegration/BookUpload.cs +++ b/src/BloomExe/WebLibraryIntegration/BookUpload.cs @@ -553,7 +553,12 @@ await PublishHelper.ReportInvalidFontsAsync( var htmlFile = BookStorage.FindBookHtmlInFolder(stagingDirectory); var xmlDomFromHtmlFile = XmlHtmlConverter.GetXmlDomFromHtmlFile(htmlFile, false); - ImageUtils.ReallyCropImages(xmlDomFromHtmlFile, stagingDirectory, stagingDirectory); + ImageUtils.ReallyCropImages( + xmlDomFromHtmlFile, + stagingDirectory, + stagingDirectory, + true + ); PublishHelper.SimplifyBackgroundImages(xmlDomFromHtmlFile); // after really cropping XmlHtmlConverter.SaveDOMAsHtml5(xmlDomFromHtmlFile, htmlFile); diff --git a/src/BloomTests/ImageProcessing/ImageUtilsTests.cs b/src/BloomTests/ImageProcessing/ImageUtilsTests.cs index 44f787037a81..145fd8f34a35 100644 --- a/src/BloomTests/ImageProcessing/ImageUtilsTests.cs +++ b/src/BloomTests/ImageProcessing/ImageUtilsTests.cs @@ -848,6 +848,77 @@ public void ReallyCropImages_SameFolderOrphanedFile_DeletesUnreferencedOriginal( } } + [Test] + public void ReallyCropImages_SameFolderWithUncropped_KeepsOriginalFile_WithMetadataChanged() + { + // This test verifies that when an image appears both cropped and uncropped, + // the original file is kept (not orphaned). + + using (var folder = new TemporaryFolder("KeepOriginalTest")) + { + var imagePath = Path.Combine(folder.Path, "shared.png"); + var _pathToTestImages = "src\\BloomTests\\ImageProcessing\\images"; + var sourcePath = FileLocationUtilities.GetFileDistributedWithApplication( + _pathToTestImages, + "bird.png" + ); + RobustFile.Copy(sourcePath, imagePath); + + // Create a DOM with both cropped and uncropped usage + var dom = new HtmlDom( + @" +
+
+
" + + MakeImageCanvasElement( + "uncroppedImg", + "shared.png", + "height: 300px; left: 10px; top: 10px; width: 200px;" + ) + + MakeImageCanvasElement( + "croppedImg", + "shared.png", + "height: 300px; left: 10px; top: 10px; width: 200px;", + "width: 400px; left: -50px; top: -50px" + ) + + @"
+
+
+ " + ); + + var originalBytes = RobustFile.ReadAllBytes(imagePath); + + // SUT + ImageUtils.ReallyCropImages(dom.RawDom, folder.Path, folder.Path, true); + + // Verify the original file still exists + Assert.That( + File.Exists(imagePath), + Is.True, + "Original file should be kept when still referenced by uncropped image" + ); + + // Verify the file content has changed due to fixing the metadata. + var currentBytes = RobustFile.ReadAllBytes(imagePath); + Assert.That( + currentBytes.Length, + Is.Not.EqualTo(originalBytes.Length), + "Original file content has lost some metadata" + ); + + // Verify uncropped image still references original + var uncroppedImg = dom.SelectSingleNode("//img[@id='uncroppedImg']"); + Assert.That(uncroppedImg.GetAttribute("src"), Is.EqualTo("shared.png")); + + // Verify cropped image has new name + var croppedImg = dom.SelectSingleNode("//img[@id='croppedImg']"); + var croppedSrc = croppedImg.GetAttribute("src"); + Assert.That(croppedSrc, Is.Not.EqualTo("shared.png")); + Assert.That(File.Exists(Path.Combine(folder.Path, croppedSrc)), Is.True); + } + } + [Test] public void ReallyCropImages_SameFolderWithUncropped_KeepsOriginalFile() {