Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
219 changes: 187 additions & 32 deletions src/BloomExe/ImageProcessing/ImageUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1523,12 +1523,9 @@ Rectangle cropRectangle
if (result.ExitCode != 0)
{
LogGraphicsMagickFailure(result);
return result;
}

var metadata = RobustFileIO.MetadataFromFile(sourcePath);
if (metadata != null && metadata.ExceptionCaughtWhileLoading == null)
metadata.Write(destPath);

TrimMetadataInImage(sourcePath, destPath);
return result;
}

Expand Down Expand Up @@ -2171,7 +2168,8 @@ private static T WithSafeFilePath<T>(
public static void ReallyCropImages(
SafeXmlDocument bookDom,
string imageSourceFolder,
string imageDestFolder
string imageDestFolder,
bool alsoTrimMetadataForUncroppedImages = false
)
{
var images = bookDom.SafeSelectNodes("//img").Cast<SafeXmlElement>().ToArray();
Expand Down Expand Up @@ -2216,39 +2214,105 @@ string imageDestFolder
var canvasElement = imgContainer?.ParentNode as SafeXmlElement;
var canvasElementStyle = canvasElement?.GetAttribute("style");
if (!SignifiesCropping(style) || canvasElementStyle == null)
continue;
var canvasElementWidth = GetNumberFromPx("width", canvasElementStyle);
var canvasElementHeight = GetNumberFromPx("height", canvasElementStyle);

var key = $"{src}|{style}|{canvasElementWidth}|{canvasElementHeight}";
if (cropped.TryGetValue(key, out string fileName))
{
// This is a duplicate. We can use the same cropped image file.
img.SetAttribute("src", fileName);
// With that src, it's already cropped, so remove the style to avoid
// applying the cropping again to the already-cropped image.
img.RemoveAttribute("style");
continue;
if (alsoTrimMetadataForUncroppedImages)
TrimTheMetadata(
imageSourceFolder,
imageDestFolder,
cropped,
srcUsageCount,
replacedOriginals,
img,
src,
style
);
}
else
{
CropTheImageAndMetadata(
imageSourceFolder,
imageDestFolder,
uncroppedSrcNames,
cropped,
srcUsageCount,
replacedOriginals,
img,
src,
style,
canvasElementStyle
);
}
}
DeleteOrphanedImageFiles(imageSourceFolder, imageDestFolder, images, replacedOriginals);
}

var needNewName = uncroppedSrcNames.Contains(src) || srcUsageCount[src] > 1;
private static void TrimTheMetadata(
string imageSourceFolder,
string imageDestFolder,
Dictionary<string, string> cropped,
Dictionary<string, int> srcUsageCount,
HashSet<string> replacedOriginals,
SafeXmlElement img,
string src,
string style
)
{
var key = $"{src}|{style}|0|0";
if (cropped.TryGetValue(key, out string fileName))
{
// This is a duplicate. We can use the same metadata-cropped image file.
img.SetAttribute("src", fileName);
return;
}
var trimmedFileName = TrimMetadataInImage(img, imageSourceFolder, imageDestFolder);
cropped[key] = src;
}

var croppedFileName = ReallyCropImage(
img,
imageSourceFolder,
imageDestFolder,
needNewName
);
private static void CropTheImageAndMetadata(
string imageSourceFolder,
string imageDestFolder,
HashSet<string> uncroppedSrcNames,
Dictionary<string, string> cropped,
Dictionary<string, int> srcUsageCount,
HashSet<string> replacedOriginals,
SafeXmlElement img,
string src,
string style,
string canvasElementStyle
)
{
var canvasElementWidth = GetNumberFromPx("width", canvasElementStyle);
var canvasElementHeight = GetNumberFromPx("height", canvasElementStyle);

// Track if we replaced an original file with a new one
if (croppedFileName != null && needNewName && croppedFileName != src)
{
replacedOriginals.Add(src);
}
var key = $"{src}|{style}|{canvasElementWidth}|{canvasElementHeight}";
if (cropped.TryGetValue(key, out string fileName))
{
// This is a duplicate. We can use the same cropped image file.
img.SetAttribute("src", fileName);
// With that src, it's already cropped, so remove the style to avoid
// applying the cropping again to the already-cropped image.
img.RemoveAttribute("style");
return;
}

cropped[key] = croppedFileName;
var needNewName = uncroppedSrcNames.Contains(src) || srcUsageCount[src] > 1;

var croppedFileName = ReallyCropImage(
img,
imageSourceFolder,
imageDestFolder,
needNewName
);

// Track if we replaced an original file with a new one
if (croppedFileName != null && needNewName && croppedFileName != src)
{
replacedOriginals.Add(src);
}
DeleteOrphanedImageFiles(imageSourceFolder, imageDestFolder, images, replacedOriginals);
if (croppedFileName != null)
cropped[key] = croppedFileName;
else
cropped[key] = src;
}

private static void DeleteOrphanedImageFiles(
Expand Down Expand Up @@ -2315,6 +2379,97 @@ private static bool SignifiesCropping(string imgStyle)
return width > 0;
}

private static string TrimMetadataInImage(string srcPath, string destPath)
{
// Try to reduce the metadata to just what we want for intellectual property and
// collection information.
try
{
var metadata = RobustFileIO.MetadataFromFile(srcPath);
if (metadata != null && metadata.ExceptionCaughtWhileLoading == null)
{
try
{
// Remove all metadata from the source file as well and then restore the
// minimal metadata we care about. The existing metadata can sometimes
// cause problems in the the TagSharp library. (BL-16058)
using (var tagFile = RobustFileIO.CreateTaglibFile(destPath))
{
tagFile.RemoveTags(TagTypes.AllTags);
tagFile.Save();
}
var newMeta = new Metadata
{
Creator = metadata.Creator,
License = metadata.License,
CopyrightNotice = metadata.CopyrightNotice,
AttributionUrl = metadata.AttributionUrl,
CollectionName = metadata.CollectionName,
CollectionUri = metadata.CollectionUri,
};
newMeta.WriteIntellectualPropertyOnly(destPath);
}
catch (Exception e)
{
Logger.WriteError(
$"Error copying metadata from {srcPath} to {destPath}. ",
e
);
}
}
}
catch (Exception e)
{
// This can happen in unit tests that have fake data.
Debug.WriteLine(
$"Exception caught while trying to trim metadata from {srcPath} to {destPath}.",
e
);
}
return destPath;
}

private static string TrimMetadataInImage(
SafeXmlElement img,
string imageSourceFolder,
string imageDestFolder
)
{
var src = img.GetAttribute("src");
var srcPath = UrlPathString.GetFullyDecodedPath(imageSourceFolder, ref src);
if (!RobustFile.Exists(srcPath))
return null;

var ext = Path.GetExtension(srcPath).ToLowerInvariant();
if (ext == ".svg")
{
// SVG files don't have metadata to trim.
return null;
}
var trimmedFilePath = Path.ChangeExtension(
Path.Combine(imageDestFolder, Guid.NewGuid().ToString()),
ext
);

// We need an output file even if it's an identical copy.
RobustFile.Copy(srcPath, trimmedFilePath, true);

TrimMetadataInImage(srcPath, trimmedFilePath);
// Capture this before we unencode, since it wants
// to be a value we can put in a src attribute (if it doesn't get changed)
// to lead to the file we may put at an unchanged file name.
var result = img.GetAttribute("src");
// We don't need the path here, but this function may correct 'src' (e.g.,
// removing some url encoding to find the actual file). And since we're not
// using a new name, we want to exactly overwrite the original image file.
UrlPathString.GetFullyDecodedPath(imageSourceFolder, ref src);
var destPath = Path.Combine(imageDestFolder, src);
RobustFile.Move(trimmedFilePath, destPath, true);
// If it failed, it should have already logged the reason. I think all we can do
// is leave the image alone.
return result;
}

private static string ReallyCropImage(
SafeXmlElement img,
string imageSourceFolder,
Expand Down
8 changes: 6 additions & 2 deletions src/BloomExe/Publish/PublishHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1051,7 +1051,8 @@ public static Book.Book MakeDeviceXmatterTempBook(
ImageUtils.ReallyCropImages(
modifiedBook.RawDom,
modifiedBook.FolderPath,
modifiedBook.FolderPath
modifiedBook.FolderPath,
true
);
// Must come after ReallyCropImages, because any cropping for background images is
// destroyed by SimplifyBackgroundImages.
Expand All @@ -1065,7 +1066,10 @@ public static Book.Book MakeDeviceXmatterTempBook(
var childrenToRemove = dataDiv
.ChildNodes.OfType<SafeXmlElement>()
.Where(child =>
child.ChildNodes.OfType<SafeXmlElement>().FirstOrDefault()?.HasClass("bloom-canvas")
child
.ChildNodes.OfType<SafeXmlElement>()
.FirstOrDefault()
?.HasClass("bloom-canvas")
?? false
)
.ToArray();
Expand Down
2 changes: 1 addition & 1 deletion src/BloomExe/Spreadsheet/SpreadsheetExporter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ public InternalSpreadsheet Export(
{
if (_outputImageFolder != null)
{
ImageUtils.ReallyCropImages(dom.RawDom, bookFolderPath, _outputImageFolder);
ImageUtils.ReallyCropImages(dom.RawDom, bookFolderPath, _outputImageFolder, true);
}

_progress = progress ?? new NullWebSocketProgress();
Expand Down
7 changes: 6 additions & 1 deletion src/BloomExe/WebLibraryIntegration/BookUpload.cs
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,12 @@ await PublishHelper.ReportInvalidFontsAsync(
var htmlFile = BookStorage.FindBookHtmlInFolder(stagingDirectory);
var xmlDomFromHtmlFile = XmlHtmlConverter.GetXmlDomFromHtmlFile(htmlFile, false);

ImageUtils.ReallyCropImages(xmlDomFromHtmlFile, stagingDirectory, stagingDirectory);
ImageUtils.ReallyCropImages(
xmlDomFromHtmlFile,
stagingDirectory,
stagingDirectory,
true
);
PublishHelper.SimplifyBackgroundImages(xmlDomFromHtmlFile); // after really cropping

XmlHtmlConverter.SaveDOMAsHtml5(xmlDomFromHtmlFile, htmlFile);
Expand Down
71 changes: 71 additions & 0 deletions src/BloomTests/ImageProcessing/ImageUtilsTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -848,6 +848,77 @@ public void ReallyCropImages_SameFolderOrphanedFile_DeletesUnreferencedOriginal(
}
}

[Test]
public void ReallyCropImages_SameFolderWithUncropped_KeepsOriginalFile_WithMetadataChanged()
{
// This test verifies that when an image appears both cropped and uncropped,
// the original file is kept (not orphaned).

using (var folder = new TemporaryFolder("KeepOriginalTest"))
{
var imagePath = Path.Combine(folder.Path, "shared.png");
var _pathToTestImages = "src\\BloomTests\\ImageProcessing\\images";
var sourcePath = FileLocationUtilities.GetFileDistributedWithApplication(
_pathToTestImages,
"bird.png"
);
RobustFile.Copy(sourcePath, imagePath);

// Create a DOM with both cropped and uncropped usage
var dom = new HtmlDom(
@"<html><head></head><body>
<div class=""bloom-page"">
<div class=""marginBox"">
<div class=""bloom-canvas"">"
+ MakeImageCanvasElement(
"uncroppedImg",
"shared.png",
"height: 300px; left: 10px; top: 10px; width: 200px;"
)
+ MakeImageCanvasElement(
"croppedImg",
"shared.png",
"height: 300px; left: 10px; top: 10px; width: 200px;",
"width: 400px; left: -50px; top: -50px"
)
+ @"</div>
</div>
</div>
</body></html>"
);

var originalBytes = RobustFile.ReadAllBytes(imagePath);

// SUT
ImageUtils.ReallyCropImages(dom.RawDom, folder.Path, folder.Path, true);

// Verify the original file still exists
Assert.That(
File.Exists(imagePath),
Is.True,
"Original file should be kept when still referenced by uncropped image"
);

// Verify the file content has changed due to fixing the metadata.
var currentBytes = RobustFile.ReadAllBytes(imagePath);
Assert.That(
currentBytes.Length,
Is.Not.EqualTo(originalBytes.Length),
"Original file content has lost some metadata"
);

// Verify uncropped image still references original
var uncroppedImg = dom.SelectSingleNode("//img[@id='uncroppedImg']");
Assert.That(uncroppedImg.GetAttribute("src"), Is.EqualTo("shared.png"));

// Verify cropped image has new name
var croppedImg = dom.SelectSingleNode("//img[@id='croppedImg']");
var croppedSrc = croppedImg.GetAttribute("src");
Assert.That(croppedSrc, Is.Not.EqualTo("shared.png"));
Assert.That(File.Exists(Path.Combine(folder.Path, croppedSrc)), Is.True);
}
}

[Test]
public void ReallyCropImages_SameFolderWithUncropped_KeepsOriginalFile()
{
Expand Down