diff --git a/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt b/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt index 9314330..c3c0fca 100644 --- a/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt +++ b/app/src/main/java/org/fairscan/app/data/DocumentMetadata.kt @@ -15,6 +15,7 @@ package org.fairscan.app.data import kotlinx.serialization.Serializable +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ColorMode @Serializable @@ -44,6 +45,8 @@ data class PageV2( val userQuad: NormalizedQuad? = null, val isColored: Boolean? = null, val colorMode: ColorMode? = null, + val focalLength: Float? = null, + val sensorWidth: Float? = null, ) @Serializable diff --git a/app/src/main/java/org/fairscan/app/data/ImageRepository.kt b/app/src/main/java/org/fairscan/app/data/ImageRepository.kt index e388b39..86dbf8b 100644 --- a/app/src/main/java/org/fairscan/app/data/ImageRepository.kt +++ b/app/src/main/java/org/fairscan/app/data/ImageRepository.kt @@ -35,6 +35,7 @@ import org.fairscan.app.domain.ScanPage import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Quad +import org.fairscan.imageprocessing.cameraIntrinsics import java.io.File import java.util.Collections.synchronizedMap @@ -153,6 +154,8 @@ class ImageRepository( manualRotationDegrees = Rotation.R0.degrees, isColored = metadata.autoColorMode == ColorMode.COLOR, colorMode = colorMode, + focalLength = metadata.cameraIntrinsics?.focalLength, + sensorWidth = metadata.cameraIntrinsics?.sensorWidth, ) ) saveMetadata() @@ -215,8 +218,7 @@ class ImageRepository( val processedJpeg = transformations.process( sourceJpeg, - normalizedQuad = update.normalizedQuad, - baseRotation = metadata.baseRotation, + metadata = metadata.copy(normalizedQuad = update.normalizedQuad), colorMode = update.colorMode ) processedFile.writeBytes(processedJpeg.bytes) @@ -403,6 +405,7 @@ fun PageV2.toMetadata(): PageMetadata? { return PageMetadata( (userQuad ?: quad).toQuad(), Rotation.fromDegrees(baseRotationDegrees), - if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE + if (isColored) ColorMode.COLOR else ColorMode.GRAYSCALE, + cameraIntrinsics(focalLength, sensorWidth) ) } diff --git a/app/src/main/java/org/fairscan/app/data/ImageTransformations.kt b/app/src/main/java/org/fairscan/app/data/ImageTransformations.kt index ef42a8b..12bbf38 100644 --- a/app/src/main/java/org/fairscan/app/data/ImageTransformations.kt +++ b/app/src/main/java/org/fairscan/app/data/ImageTransformations.kt @@ -15,9 +15,8 @@ package org.fairscan.app.data import org.fairscan.app.domain.Jpeg -import org.fairscan.app.domain.Rotation +import org.fairscan.app.domain.PageMetadata import org.fairscan.imageprocessing.ColorMode -import org.fairscan.imageprocessing.Quad interface ImageTransformations { @@ -27,8 +26,7 @@ interface ImageTransformations { fun process( source: Jpeg, - normalizedQuad: Quad, - baseRotation: Rotation, + metadata: PageMetadata, colorMode: ColorMode ): Jpeg diff --git a/app/src/main/java/org/fairscan/app/domain/ExportPreparation.kt b/app/src/main/java/org/fairscan/app/domain/ExportPreparation.kt index 8d6d816..bb22b58 100644 --- a/app/src/main/java/org/fairscan/app/domain/ExportPreparation.kt +++ b/app/src/main/java/org/fairscan/app/domain/ExportPreparation.kt @@ -51,8 +51,7 @@ suspend fun jpegsForExport( val colorMode = page.colorMode if (source != null && metadata != null && colorMode != null) { val rotation = page.totalRotation() - val normalizedQuad = metadata.normalizedQuad - processedImage(source, normalizedQuad, rotation, colorMode, exportQuality) + processedImage(source, metadata, rotation, colorMode, exportQuality) } else jpeg(page, imageRepository) diff --git a/app/src/main/java/org/fairscan/app/domain/Page.kt b/app/src/main/java/org/fairscan/app/domain/Page.kt index 464853a..09dfa13 100644 --- a/app/src/main/java/org/fairscan/app/domain/Page.kt +++ b/app/src/main/java/org/fairscan/app/domain/Page.kt @@ -14,6 +14,7 @@ */ package org.fairscan.app.domain +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.Quad @@ -21,6 +22,7 @@ data class PageMetadata( val normalizedQuad: Quad, val baseRotation: Rotation, val autoColorMode: ColorMode, + val cameraIntrinsics: CameraIntrinsics?, ) data class ScanPage( diff --git a/app/src/main/java/org/fairscan/app/platform/ImageProcessor.kt b/app/src/main/java/org/fairscan/app/platform/ImageProcessor.kt index 85cfcd7..d8fd602 100644 --- a/app/src/main/java/org/fairscan/app/platform/ImageProcessor.kt +++ b/app/src/main/java/org/fairscan/app/platform/ImageProcessor.kt @@ -25,6 +25,7 @@ import org.fairscan.app.domain.Jpeg import org.fairscan.app.domain.PageMetadata import org.fairscan.app.domain.Rotation import org.fairscan.app.ui.screens.settings.DefaultColorMode +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.Mask import org.fairscan.imageprocessing.Point @@ -79,17 +80,17 @@ class ImageProcessor(private val thumbnailSizePx: Int) : ImageTransformations { override fun process( source: Jpeg, - normalizedQuad: Quad, - baseRotation: Rotation, + metadata: PageMetadata, colorMode: ColorMode ): Jpeg { - return processedImage(source, normalizedQuad, baseRotation, colorMode, ExportQuality.BALANCED) + val baseRotation = metadata.baseRotation + return processedImage(source, metadata, baseRotation, colorMode, ExportQuality.BALANCED) } } fun processedImage( source: Jpeg, - normalizedQuad: Quad, + metadata: PageMetadata, rotation: Rotation, colorMode: ColorMode, exportQuality: ExportQuality, @@ -99,8 +100,9 @@ fun processedImage( var page: Mat? = null try { sourceMat = source.toMat() - val quad = normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height()) - page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels) + val quad = metadata.normalizedQuad.scaledTo(1, 1, sourceMat.width(), sourceMat.height()) + page = extractDocument(sourceMat, quad, rotationDegrees, colorMode, exportQuality.maxPixels, + metadata.cameraIntrinsics) return Jpeg.fromMat(page, exportQuality.jpegQuality) } finally { sourceMat?.release() @@ -114,7 +116,8 @@ fun extractDocumentFromBitmap( rotationDegrees: Int, mask: Mask?, viewModelScope: CoroutineScope, - defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO + defaultColorMode: DefaultColorMode = DefaultColorMode.AUTO, + cameraIntrinsics: CameraIntrinsics?, ): CapturedPage { val exportQuality = ExportQuality.BALANCED var colorMode = ColorMode.COLOR @@ -140,7 +143,8 @@ fun extractDocumentFromBitmap( normalizedQuad = quad.scaledTo(source.width, source.height, 1, 1) autoColorMode = autoColorMode(bgr, mask, quad) colorMode = defaultColorMode.colorMode ?: autoColorMode - page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels) + page = extractDocument(bgr, quad, rotationDegrees, colorMode, exportQuality.maxPixels, + cameraIntrinsics) } val pageJpeg = Jpeg.fromMat(page, exportQuality.jpegQuality) @@ -148,7 +152,7 @@ fun extractDocumentFromBitmap( page.release() val baseRotation = Rotation.fromDegrees(rotationDegrees) - val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode) + val metadata = PageMetadata(normalizedQuad, baseRotation, autoColorMode, cameraIntrinsics) val sourceJpegDeferred = viewModelScope.async(Dispatchers.IO) { compressSource(source) } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt index a56a2da..4d96547 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraPreview.kt @@ -19,6 +19,9 @@ import android.util.Log import android.util.Size import android.view.ViewGroup.LayoutParams.MATCH_PARENT import android.widget.LinearLayout +import androidx.annotation.OptIn +import androidx.camera.camera2.interop.Camera2CameraInfo +import androidx.camera.camera2.interop.ExperimentalCamera2Interop import androidx.camera.core.CameraControl import androidx.camera.core.CameraSelector import androidx.camera.core.FocusMeteringAction @@ -65,12 +68,15 @@ import androidx.core.graphics.scale import androidx.lifecycle.LifecycleOwner import androidx.lifecycle.compose.LocalLifecycleOwner import org.fairscan.app.ui.components.CameraPermissionState +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.Point import org.fairscan.imageprocessing.Quad +import org.fairscan.imageprocessing.cameraIntrinsics import org.fairscan.imageprocessing.scaledTo import java.util.concurrent.ExecutorService import java.util.concurrent.Executors import java.util.concurrent.TimeUnit +import kotlin.math.max @Composable fun CameraPreview( @@ -162,6 +168,7 @@ fun CameraPreview( } +@OptIn(ExperimentalCamera2Interop::class) fun bindCameraUseCases( lifecycleOwner: LifecycleOwner, cameraProvider: ProcessCameraProvider, @@ -207,6 +214,7 @@ fun bindCameraUseCases( val camera = cameraProvider.bindToLifecycle(lifecycleOwner, cameraSelector, imageAnalysis, preview, imageCapture) captureController.cameraControl = camera.cameraControl + captureController.setCameraCharacteristics(Camera2CameraInfo.from(camera.cameraInfo)) } @Composable @@ -287,21 +295,22 @@ class CameraCaptureController { var imageCapture: ImageCapture? = null private val executor = Executors.newSingleThreadExecutor() var previewView: PreviewView? = null + var cameraIntrinsics: CameraIntrinsics? = null fun shutdown() { executor.shutdown() } - fun takePicture(onImageCaptured: (ImageProxy?) -> Unit) { + fun takePicture(onImageCaptured: (ImageProxy?, CameraIntrinsics?) -> Unit) { imageCapture?.takePicture( executor, object : ImageCapture.OnImageCapturedCallback() { override fun onCaptureSuccess(imageProxy: ImageProxy) { - onImageCaptured(imageProxy) + onImageCaptured(imageProxy, cameraIntrinsics) } override fun onError(exception: ImageCaptureException) { Log.e("CameraCapture", "Image capture failed: ${exception.message}", exception) - onImageCaptured(null) + onImageCaptured(null, cameraIntrinsics) } } ) @@ -320,6 +329,22 @@ class CameraCaptureController { control.startFocusAndMetering(action) } + + @OptIn(ExperimentalCamera2Interop::class) + fun setCameraCharacteristics(cameraInfo: Camera2CameraInfo) { + val focalLengths = cameraInfo.getCameraCharacteristic( + android.hardware.camera2.CameraCharacteristics.LENS_INFO_AVAILABLE_FOCAL_LENGTHS + ) + val sensorSize = cameraInfo.getCameraCharacteristic( + android.hardware.camera2.CameraCharacteristics.SENSOR_INFO_PHYSICAL_SIZE + ) + cameraIntrinsics = + if (focalLengths == null || focalLengths.size != 1 || sensorSize == null) { + null + } else { + cameraIntrinsics(focalLengths[0], max(sensorSize.width, sensorSize.height)) + } + } } sealed interface CameraBindState { diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt index af5d984..a409d79 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraScreen.kt @@ -229,7 +229,8 @@ fun CameraScreen( Log.i("FairScan", "Pressed ") cameraViewModel.onCapturePressed(it) captureController.takePicture( - onImageCaptured = { imageProxy -> cameraViewModel.onImageCaptured(imageProxy) } + onImageCaptured = { imageProxy, cameraCharacteristics -> + cameraViewModel.onImageCaptured(imageProxy, cameraCharacteristics) } ) } }, @@ -647,7 +648,7 @@ fun CameraScreenPreviewWithProcessedImage() { CapturedPage( debugImage("gallica.bnf.fr-bpt6k5530456s-1.jpg"), CompletableDeferred(Jpeg(ByteArray(0))), - PageMetadata(quad, R0, ColorMode.COLOR), + PageMetadata(quad, R0, ColorMode.COLOR, null), ColorMode.COLOR))) } diff --git a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt index 6ebfda0..040d555 100644 --- a/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt +++ b/app/src/main/java/org/fairscan/app/ui/screens/camera/CameraViewModel.kt @@ -34,6 +34,7 @@ import kotlinx.coroutines.withContext import org.fairscan.app.AppContainer import org.fairscan.app.domain.CapturedPage import org.fairscan.app.platform.extractDocumentFromBitmap +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ImageSize import org.fairscan.imageprocessing.detectDocumentQuad import java.util.concurrent.CancellationException @@ -133,12 +134,13 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { } } - fun onImageCaptured(imageProxy: ImageProxy?) { + fun onImageCaptured(imageProxy: ImageProxy?, cameraIntrinsics: CameraIntrinsics?) { if (imageProxy != null) { viewModelScope.launch { try { val source = imageProxy.toBitmap() - val page = processCapturedImage(source, imageProxy.imageInfo.rotationDegrees) + val rotationDegrees = imageProxy.imageInfo.rotationDegrees + val page = processCapturedImage(source, rotationDegrees, cameraIntrinsics) imageProxy.close() onCaptureProcessed(page) } catch (e: RuntimeException) { @@ -154,6 +156,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { private suspend fun processCapturedImage( source: Bitmap, rotationDegrees: Int, + cameraIntrinsics: CameraIntrinsics?, ): CapturedPage = withContext(Dispatchers.IO) { val segmentation = imageSegmentationService.runSegmentationAndReturn(source) val mask = segmentation?.segmentation @@ -161,7 +164,7 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { val quad = mask?.let { detectDocumentQuad(mask, originalSize, isLiveAnalysis = false) } val defaultColorMode = settingsRepository.defaultColorMode.first() val result = extractDocumentFromBitmap( - source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode) + source, quad, rotationDegrees, mask, viewModelScope, defaultColorMode, cameraIntrinsics) return@withContext result } @@ -202,11 +205,9 @@ class CameraViewModel(appContainer: AppContainer): ViewModel() { try { val photoToImport = imageLoader.load(uri) ensureActive() - val page = processCapturedImage(photoToImport, 0) + val page = processCapturedImage(photoToImport, 0, null) ensureActive() - page?.let { - _events.emit(CameraEvent.ImageCaptured(it)) - } + _events.emit(CameraEvent.ImageCaptured(page)) } catch (e: CancellationException) { throw e } catch (e: Exception) { diff --git a/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt b/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt index 28ed97e..88067a9 100644 --- a/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt +++ b/app/src/test/java/org/fairscan/app/data/ImageRepositoryTest.kt @@ -31,6 +31,7 @@ import org.fairscan.app.domain.Rotation.R0 import org.fairscan.app.domain.Rotation.R180 import org.fairscan.app.domain.Rotation.R270 import org.fairscan.app.domain.Rotation.R90 +import org.fairscan.imageprocessing.CameraIntrinsics import org.fairscan.imageprocessing.ColorMode import org.fairscan.imageprocessing.ColorMode.COLOR import org.fairscan.imageprocessing.ColorMode.GRAYSCALE @@ -51,7 +52,8 @@ class ImageRepositoryTest { private val testScope = TestScope() val quad1 = Quad(Point(.01, .02), Point(.1, .03), Point(.11, .12), Point(.03, .09)) - val metadata1 = PageMetadata(quad1, R90, COLOR) + val intrinsics = CameraIntrinsics(42.0f, 43.0f) + val metadata1 = PageMetadata(quad1, R90, COLOR, intrinsics) fun getFilesDir(): File { if (_filesDir == null) { @@ -63,7 +65,7 @@ class ImageRepositoryTest { fun repo( rotate: (Jpeg, Int) -> Jpeg = { input, _ -> input }, resizeToThumbnail: (Jpeg) -> Jpeg = { input -> jpeg(input.bytes[0]) }, - process: (Jpeg, Quad, Rotation, ColorMode) -> Jpeg = { _, _, _, _ -> + process: (Jpeg, PageMetadata, ColorMode) -> Jpeg = { _, _, _ -> throw UnsupportedOperationException() } ): ImageRepository { @@ -74,10 +76,9 @@ class ImageRepositoryTest { resizeToThumbnail(input) override fun process( source: Jpeg, - normalizedQuad: Quad, - baseRotation: Rotation, + metadata: PageMetadata, colorMode: ColorMode - ): Jpeg = process(source, normalizedQuad, baseRotation, colorMode) + ): Jpeg = process(source, metadata, colorMode) } return ImageRepository(getFilesDir(), transformations, testScope) @@ -244,7 +245,7 @@ class ImageRepositoryTest { fun setColorMode_should_process_and_update_metadata() = runTest { val jpeg1 = jpeg(10) val repo = repo( - process = { _, _ , _, mode -> + process = { _, _, mode -> assertThat(mode).isEqualTo(GRAYSCALE) jpeg(41) } @@ -262,7 +263,7 @@ class ImageRepositoryTest { fun setColorMode_should_not_run_twice_in_parallel() = runTest { var processCalls = 0 val repo = repo( - process = { _, _, _, _ -> + process = { _, _, _ -> processCalls++ runBlocking { delay(10) } jpeg(1) diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt index 5f3fb05..9698b3d 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/DocumentDetection.kt @@ -156,9 +156,14 @@ fun extractDocument( rotationDegrees: Int, colorMode: ColorMode, maxPixels: Long, + cameraIntrinsics: CameraIntrinsics? = null, ): Mat { - val (targetWidth, targetHeight) = estimateRealDimensions(quad, inputMat.cols(), inputMat.rows()) - + val (targetWidth, targetHeight) = estimateRealDimensions( + quad, + inputMat.cols(), + inputMat.rows(), + cameraIntrinsics + ) val srcPoints = MatOfPoint2f( quad.topLeft.toCv(), quad.topRight.toCv(), diff --git a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Perspective.kt b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Perspective.kt index 375284d..3f3955e 100644 --- a/imageprocessing/src/main/java/org/fairscan/imageprocessing/Perspective.kt +++ b/imageprocessing/src/main/java/org/fairscan/imageprocessing/Perspective.kt @@ -32,6 +32,21 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) { fun norm() = sqrt(x * x + y * y + z * z) } +data class CameraIntrinsics( + // in millimeters + val focalLength: Float, + val sensorWidth: Float, +) { + fun focalLengthInPixels(imageWidthInPixels: Int) = + focalLength / sensorWidth * imageWidthInPixels +} + +fun cameraIntrinsics(focalLengthInMm: Float?, sensorWidthInMm: Float?): CameraIntrinsics? { + if (focalLengthInMm == null || sensorWidthInMm == null) + return null + return CameraIntrinsics(focalLengthInMm, sensorWidthInMm) +} + /** * Estimates the true width and height of the document in the output image, * correcting for perspective distortion using projective geometry. @@ -44,7 +59,12 @@ data class Vector3D(val x: Double, val y: Double, val z: Double) { * - https://www.robots.ox.ac.uk/~vgg/publications/1999/Criminisi99/criminisi99.pdf * - https://web.stanford.edu/class/cs231a/course_notes/02-single-view-metrology.pdf */ -fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair { +fun estimateRealDimensions( + quad: Quad, + imageWidth: Int, + imageHeight: Int, + cameraIntrinsics: CameraIntrinsics? +): Pair { fun averageSides(): Pair { val w = (norm(quad.topLeft, quad.topRight) + norm(quad.bottomLeft, quad.bottomRight)) / 2 @@ -77,14 +97,18 @@ fun estimateRealDimensions(quad: Quad, imageWidth: Int, imageHeight: Int): Pair< val v1 = Point(v1h.x / v1h.z - cx, v1h.y / v1h.z - cy) val v2 = Point(v2h.x / v2h.z - cx, v2h.y / v2h.z - cy) - // Focal length estimated assuming zero skew and principal point at image center. - // Under these assumptions, the Image of the Absolute Conic (IAC) simplifies, - // and orthogonal directions satisfy v1 · ω · v2 = 0, - // which reduces to: f² = -(v1x·v2x + v1y·v2y) - val f2 = -(v1.x * v2.x + v1.y * v2.y) - if (f2 <= 0) - return averageSides() - val f = sqrt(f2) + val f = if (cameraIntrinsics != null) { + cameraIntrinsics.focalLengthInPixels(max(imageWidth, imageHeight)).toDouble() + } else { + // Focal length estimated assuming zero skew and principal point at image center. + // Under these assumptions, the Image of the Absolute Conic (IAC) simplifies, + // and orthogonal directions satisfy v1 · ω · v2 = 0, + // which reduces to: f² = -(v1x·v2x + v1y·v2y) + val f2 = -(v1.x * v2.x + v1.y * v2.y) + if (f2 <= 0) + return averageSides() + sqrt(f2) + } // Fall back when f is too large: document nearly fronto-parallel, // vanishing points are far away, making the focal length estimate unstable.