fix: don't fail epub analysis when optional features are missing

Refs: #1909
This commit is contained in:
Gauthier Roebroeck 2025-03-11 12:39:46 +08:00
parent 1250a97d99
commit 465467c50c
6 changed files with 413 additions and 292 deletions

View File

@ -38,3 +38,8 @@
| ERR_1032 | EPUB file has wrong media type |
| ERR_1033 | Some entries are missing |
| ERR_1034 | An API key with that comment already exists |
| ERR_1035 | Error while getting EPUB TOC |
| ERR_1036 | Error while getting EPUB Landmarks |
| ERR_1037 | Error while getting EPUB page list |
| ERR_1038 | Error while getting EPUB divina pages |
| ERR_1039 | Error while getting EPUB positions |

View File

@ -827,7 +827,12 @@
"ERR_1031": "ComicRack CBL Book is missing series or number",
"ERR_1032": "EPUB file has wrong media type",
"ERR_1033": "Some entries are missing",
"ERR_1034": "An API key with that comment already exists"
"ERR_1034": "An API key with that comment already exists",
"ERR_1035": "Error while getting EPUB TOC",
"ERR_1036": "Error while getting EPUB Landmarks",
"ERR_1037": "Error while getting EPUB page list",
"ERR_1038": "Error while getting EPUB divina pages",
"ERR_1039": "Error while getting EPUB positions"
},
"filter": {
"age_rating": "age rating",

View File

@ -23,6 +23,7 @@ import org.gotson.komga.infrastructure.image.ImageType
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.mediacontainer.divina.DivinaExtractor
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
import org.gotson.komga.infrastructure.mediacontainer.epub.epub
import org.gotson.komga.infrastructure.mediacontainer.pdf.PdfExtractor
import org.springframework.beans.factory.annotation.Qualifier
import org.springframework.beans.factory.annotation.Value
@ -143,29 +144,84 @@ class BookAnalyzer(
book: Book,
analyzeDimensions: Boolean,
): Media {
val manifest = epubExtractor.getManifest(book.path, analyzeDimensions)
val entriesErrorSummary =
manifest.missingResources
.map { it.fileName }
.ifEmpty { null }
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
return Media(
status = Media.Status.READY,
pages = manifest.divinaPages,
files = manifest.resources,
pageCount = manifest.pageCount,
epubDivinaCompatible = manifest.divinaPages.isNotEmpty(),
epubIsKepub = manifest.isKepub,
extension =
MediaExtensionEpub(
toc = manifest.toc,
landmarks = manifest.landmarks,
pageList = manifest.pageList,
isFixedLayout = manifest.isFixedLayout,
positions = manifest.positions,
),
comment = entriesErrorSummary,
)
book.path.epub { epub ->
val (resources, missingResources) = epubExtractor.getResources(epub).partition { it.fileSize != null }
val isFixedLayout = epubExtractor.isFixedLayout(epub)
val pageCount = epubExtractor.computePageCount(epub)
val isKepub = epubExtractor.isKepub(epub, resources)
val errors = mutableListOf<String>()
val toc =
try {
epubExtractor.getToc(epub)
} catch (e: Exception) {
logger.error(e) { "Error while getting EPUB TOC" }
errors.add("ERR_1035")
emptyList()
}
val landmarks =
try {
epubExtractor.getLandmarks(epub)
} catch (e: Exception) {
logger.error(e) { "Error while getting EPUB Landmarks" }
errors.add("ERR_1036")
emptyList()
}
val pageList =
try {
epubExtractor.getPageList(epub)
} catch (e: Exception) {
logger.error(e) { "Error while getting EPUB page list" }
errors.add("ERR_1037")
emptyList()
}
val divinaPages =
try {
epubExtractor.getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions)
} catch (e: Exception) {
logger.error(e) { "Error while getting EPUB Divina pages" }
errors.add("ERR_1038")
emptyList()
}
val positions =
try {
epubExtractor.computePositions(epub, book.path, resources, isFixedLayout, isKepub)
} catch (e: Exception) {
logger.error(e) { "Error while getting EPUB positions" }
errors.add("ERR_1039")
emptyList()
}
val entriesErrorSummary =
missingResources
.map { it.fileName }
.ifEmpty { null }
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
val allErrors = (errors + entriesErrorSummary).joinToString(" ")
return Media(
status = Media.Status.READY,
pages = divinaPages,
files = resources,
pageCount = pageCount,
epubDivinaCompatible = divinaPages.isNotEmpty(),
epubIsKepub = isKepub,
extension =
MediaExtensionEpub(
toc = toc,
landmarks = landmarks,
pageList = pageList,
isFixedLayout = isFixedLayout,
positions = positions,
),
comment = allErrors,
)
}
}
private fun analyzePdf(

View File

@ -81,30 +81,7 @@ class EpubExtractor(
}
}
fun getManifest(
path: Path,
analyzeDimensions: Boolean,
): EpubManifest =
path.epub { epub ->
val (resources, missingResources) = getResources(epub).partition { it.fileSize != null }
val isFixedLayout = isFixedLayout(epub)
val pageCount = computePageCount(epub)
val isKepub = isKepub(epub, resources)
EpubManifest(
resources = resources,
missingResources = missingResources,
toc = getToc(epub),
landmarks = getLandmarks(epub),
pageList = getPageList(epub),
pageCount = pageCount,
isFixedLayout = isFixedLayout,
positions = computePositions(epub, path, resources, isFixedLayout, isKepub),
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
isKepub = isKepub,
)
}
private fun getResources(epub: EpubPackage): List<MediaFile> {
fun getResources(epub: EpubPackage): List<MediaFile> {
val spine =
epub.opfDoc
.select("spine > itemref")
@ -135,7 +112,7 @@ class EpubExtractor(
}
}
private fun getDivinaPages(
fun getDivinaPages(
epub: EpubPackage,
isFixedLayout: Boolean,
pageCount: Int,
@ -146,72 +123,67 @@ class EpubExtractor(
return emptyList()
}
try {
val pagesWithImages =
epub.opfDoc
.select("spine > itemref")
.map { it.attr("idref") }
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
.map { pagePath ->
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
val pagesWithImages =
epub.opfDoc
.select("spine > itemref")
.map { it.attr("idref") }
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
.map { pagePath ->
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
// if a page has text over the threshold then the book is not divina compatible
if (doc.body().text().length > letterCountThreshold) return emptyList()
// if a page has text over the threshold then the book is not divina compatible
if (doc.body().text().length > letterCountThreshold) return emptyList()
val img =
doc
.getElementsByTag("img")
.map { it.attr("src") } // get the src, which can be a relative path
val img =
doc
.getElementsByTag("img")
.map { it.attr("src") } // get the src, which can be a relative path
val svg =
doc
.select("svg > image[xlink:href]")
.map { it.attr("xlink:href") } // get the source, which can be a relative path
val svg =
doc
.select("svg > image[xlink:href]")
.map { it.attr("xlink:href") } // get the source, which can be a relative path
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
}
if (pagesWithImages.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
return emptyList()
}
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
if (imagesPath.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
return emptyList()
}
val divinaPages =
imagesPath.mapNotNull { imagePath ->
val mediaType =
epub.manifest.values
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
?.mediaType ?: return@mapNotNull null
val zipEntry = epub.zip.getEntry(imagePath)
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
val dimension =
if (analyzeDimensions)
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
else
null
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
}
if (divinaPages.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
return emptyList()
}
return divinaPages
} catch (e: Exception) {
logger.warn(e) { "Error while getting divina pages" }
if (pagesWithImages.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
return emptyList()
}
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
if (imagesPath.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
return emptyList()
}
val divinaPages =
imagesPath.mapNotNull { imagePath ->
val mediaType =
epub.manifest.values
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
?.mediaType ?: return@mapNotNull null
val zipEntry = epub.zip.getEntry(imagePath)
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
val dimension =
if (analyzeDimensions)
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
else
null
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
}
if (divinaPages.size != pageCount) {
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
return emptyList()
}
return divinaPages
}
private fun isKepub(
fun isKepub(
epub: EpubPackage,
resources: List<MediaFile>,
): Boolean {
@ -228,7 +200,7 @@ class EpubExtractor(
return false
}
private fun computePageCount(epub: EpubPackage): Int {
fun computePageCount(epub: EpubPackage): Int {
val spine =
epub.opfDoc
.select("spine > itemref")
@ -241,11 +213,11 @@ class EpubExtractor(
.sumOf { ceil(it.compressedSize / 1024.0).toInt() }
}
private fun isFixedLayout(epub: EpubPackage) =
fun isFixedLayout(epub: EpubPackage) =
epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
private fun computePositions(
fun computePositions(
epub: EpubPackage,
path: Path,
resources: List<MediaFile>,
@ -346,7 +318,7 @@ class EpubExtractor(
}
}
private fun getToc(epub: EpubPackage): List<EpubTocEntry> {
fun getToc(epub: EpubPackage): List<EpubTocEntry> {
// Epub 3
epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) }
// Epub 2
@ -354,7 +326,7 @@ class EpubExtractor(
return emptyList()
}
private fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
// Epub 3
epub.getNavResource()?.let { return processNav(it, Epub3Nav.PAGELIST) }
// Epub 2
@ -362,7 +334,7 @@ class EpubExtractor(
return emptyList()
}
private fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
// Epub 3
epub.getNavResource()?.let { return processNav(it, Epub3Nav.LANDMARKS) }

View File

@ -1,19 +0,0 @@
package org.gotson.komga.infrastructure.mediacontainer.epub
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.EpubTocEntry
import org.gotson.komga.domain.model.MediaFile
import org.gotson.komga.domain.model.R2Locator
data class EpubManifest(
val resources: List<MediaFile>,
val missingResources: List<MediaFile>,
val toc: List<EpubTocEntry>,
val landmarks: List<EpubTocEntry>,
val pageList: List<EpubTocEntry>,
val pageCount: Int,
val isFixedLayout: Boolean,
val positions: List<R2Locator>,
val divinaPages: List<BookPage>,
val isKepub: Boolean,
)

View File

@ -1,6 +1,7 @@
package org.gotson.komga.domain.service
import com.ninjasquad.springmockk.SpykBean
import io.mockk.clearAllMocks
import io.mockk.every
import io.mockk.verify
import org.assertj.core.api.Assertions.assertThat
@ -8,8 +9,12 @@ import org.gotson.komga.domain.model.Book
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.BookWithMedia
import org.gotson.komga.domain.model.Media
import org.gotson.komga.domain.model.MediaExtensionEpub
import org.gotson.komga.domain.model.makeBook
import org.gotson.komga.infrastructure.configuration.KomgaProperties
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
import org.junit.jupiter.api.AfterEach
import org.junit.jupiter.api.Nested
import org.junit.jupiter.api.Test
import org.junit.jupiter.params.ParameterizedTest
import org.junit.jupiter.params.provider.MethodSource
@ -32,194 +37,291 @@ class BookAnalyzerTest(
@SpykBean
private lateinit var bookAnalyzer: BookAnalyzer
@Test
fun `given rar4 archive when analyzing then media status is READY`() {
val file = ClassPathResource("archives/rar4.rar")
val book = Book("book", file.url, LocalDateTime.now())
@SpykBean
private lateinit var epubExtractor: EpubExtractor
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(3)
@AfterEach
fun afterEach() {
clearAllMocks()
}
@ParameterizedTest
@ValueSource(
strings = [
"rar4-solid.rar", "rar4-encrypted.rar",
],
)
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/rar4-solid.rar")
val book = Book("book", file.url, LocalDateTime.now())
@Nested
inner class ArchiveFormats {
@Test
fun `given rar4 archive when analyzing then media status is READY`() {
val file = ClassPathResource("archives/rar4.rar")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(3)
}
@ParameterizedTest
@ValueSource(
strings = [
"rar4-solid.rar", "rar4-encrypted.rar",
],
)
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/rar4-solid.rar")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
}
@ParameterizedTest
@ValueSource(
strings = [
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
],
)
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
}
@ParameterizedTest
@ValueSource(
strings = [
"7zip.7z", "7zip-encrypted.7z",
],
)
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
}
@ParameterizedTest
@ValueSource(
strings = [
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
],
)
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(1)
}
@Test
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
val file = ClassPathResource("archives/zip-encrypted.zip")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.ERROR)
}
@Test
fun `given epub archive when analyzing then media status is READY`() {
val file = ClassPathResource("archives/epub3.epub")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(0)
}
}
@ParameterizedTest
@ValueSource(
strings = [
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
],
)
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
@Nested
inner class Epub {
@Test
fun `given broken epub archive when analyzing then media status is ERROR`() {
val file = ClassPathResource("archives/zip-as-epub.epub")
val book = Book("book", file.url, LocalDateTime.now())
val media = bookAnalyzer.analyze(book, false)
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.ERROR)
assertThat(media.pages).hasSize(0)
}
@Test
fun `given epub archive when toc cannot be extracted then media status is READY with comments`() {
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val book = Book("book", file.url, LocalDateTime.now())
every { epubExtractor.getToc(any()) } throws Exception("mock exception")
val media = bookAnalyzer.analyze(book, false)
val extension = media.extension as? MediaExtensionEpub
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.comment).contains("ERR_1035")
assertThat(extension).isNotNull
assertThat(extension!!.toc).isEmpty()
}
@Test
fun `given epub archive when landmarks cannot be extracted then media status is READY with comments`() {
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val book = Book("book", file.url, LocalDateTime.now())
every { epubExtractor.getLandmarks(any()) } throws Exception("mock exception")
val media = bookAnalyzer.analyze(book, false)
val extension = media.extension as? MediaExtensionEpub
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.comment).contains("ERR_1036")
assertThat(extension).isNotNull
assertThat(extension!!.landmarks).isEmpty()
}
@Test
fun `given epub archive when page list cannot be extracted then media status is READY with comments`() {
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val book = Book("book", file.url, LocalDateTime.now())
every { epubExtractor.getPageList(any()) } throws Exception("mock exception")
val media = bookAnalyzer.analyze(book, false)
val extension = media.extension as? MediaExtensionEpub
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.comment).contains("ERR_1037")
assertThat(extension).isNotNull
assertThat(extension!!.pageList).isEmpty()
}
@Test
fun `given epub archive when divina pages cannot be extracted then media status is READY with comments`() {
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val book = Book("book", file.url, LocalDateTime.now())
every { epubExtractor.getDivinaPages(any(), any(), any(), any()) } throws Exception("mock exception")
val media = bookAnalyzer.analyze(book, false)
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.comment).contains("ERR_1038")
assertThat(media.pages).isEmpty()
}
@Test
fun `given epub archive when positions cannot be extracted then media status is READY with comments`() {
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
val book = Book("book", file.url, LocalDateTime.now())
every { epubExtractor.computePositions(any(), any(), any(), any(), any()) } throws Exception("mock exception")
val media = bookAnalyzer.analyze(book, false)
val extension = media.extension as? MediaExtensionEpub
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.comment).contains("ERR_1039")
assertThat(extension).isNotNull
assertThat(extension!!.positions).isEmpty()
}
}
@ParameterizedTest
@ValueSource(
strings = [
"7zip.7z", "7zip-encrypted.7z",
],
)
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
@Nested
inner class PageHashing {
@Test
fun `given book with a single page when hashing then all pages are hashed`() {
val book = makeBook("book1")
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
val media = Media(Media.Status.READY, pages = pages)
val media = bookAnalyzer.analyze(book, false)
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
}
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
@ParameterizedTest
@ValueSource(
strings = [
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
],
)
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
val file = ClassPathResource("archives/$fileName")
val book = Book("book", file.url, LocalDateTime.now())
assertThat(hashedMedia.pages).hasSize(1)
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
}
val media = bookAnalyzer.analyze(book, false)
@Test
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
val book = makeBook("book1")
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
val media = Media(Media.Status.READY, pages = pages)
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(1)
}
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
@Test
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
val file = ClassPathResource("archives/zip-encrypted.zip")
val book = Book("book", file.url, LocalDateTime.now())
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
val media = bookAnalyzer.analyze(book, false)
assertThat(hashedMedia.pages).hasSize(30)
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
.hasSize(komgaProperties.pageHashing)
.containsOnly("hashed")
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
.hasSize(komgaProperties.pageHashing)
.containsOnly("hashed")
assertThat(
hashedMedia.pages
.drop(komgaProperties.pageHashing)
.dropLast(komgaProperties.pageHashing)
.map { it.fileHash },
).hasSize(30 - (komgaProperties.pageHashing * 2))
.containsOnly("")
}
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.ERROR)
}
@Test
fun `given book with already hashed pages when hashing then no hashing is done`() {
val book = makeBook("book1")
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
val media = Media(Media.Status.READY, pages = pages)
@Test
fun `given epub archive when analyzing then media status is READY`() {
val file = ClassPathResource("archives/epub3.epub")
val book = Book("book", file.url, LocalDateTime.now())
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
val media = bookAnalyzer.analyze(book, false)
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
assertThat(media.mediaType).isEqualTo("application/epub+zip")
assertThat(media.status).isEqualTo(Media.Status.READY)
assertThat(media.pages).hasSize(0)
}
assertThat(hashedMedia.pages.map { it.fileHash })
.hasSize(30)
.containsOnly("hashed")
}
@Test
fun `given broken epub archive when analyzing then media status is ERROR`() {
val file = ClassPathResource("archives/zip-as-epub.epub")
val book = Book("book", file.url, LocalDateTime.now())
@ParameterizedTest
@MethodSource("provideDirectoriesForPageHashing")
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
val files = directory.listDirectoryEntries()
assertThat(files).hasSize(2)
val media = bookAnalyzer.analyze(book, false)
val mediaType = "image/${directory.fileName.extension}"
assertThat(media.mediaType).isEqualTo("application/zip")
assertThat(media.status).isEqualTo(Media.Status.ERROR)
assertThat(media.pages).hasSize(0)
}
val hashes =
files.map {
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
}
@Test
fun `given book with a single page when hashing then all pages are hashed`() {
val book = makeBook("book1")
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
val media = Media(Media.Status.READY, pages = pages)
assertThat(hashes.first()).isEqualTo(hashes.last())
}
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
assertThat(hashedMedia.pages).hasSize(1)
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
}
@Test
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
val book = makeBook("book1")
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
val media = Media(Media.Status.READY, pages = pages)
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
assertThat(hashedMedia.pages).hasSize(30)
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
.hasSize(komgaProperties.pageHashing)
.containsOnly("hashed")
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
.hasSize(komgaProperties.pageHashing)
.containsOnly("hashed")
assertThat(
hashedMedia.pages
.drop(komgaProperties.pageHashing)
.dropLast(komgaProperties.pageHashing)
.map { it.fileHash },
).hasSize(30 - (komgaProperties.pageHashing * 2))
.containsOnly("")
}
@Test
fun `given book with already hashed pages when hashing then no hashing is done`() {
val book = makeBook("book1")
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
val media = Media(Media.Status.READY, pages = pages)
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
assertThat(hashedMedia.pages.map { it.fileHash })
.hasSize(30)
.containsOnly("hashed")
}
@ParameterizedTest
@MethodSource("provideDirectoriesForPageHashing")
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
val files = directory.listDirectoryEntries()
assertThat(files).hasSize(2)
val mediaType = "image/${directory.fileName.extension}"
val hashes =
files.map {
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
}
assertThat(hashes.first()).isEqualTo(hashes.last())
}
companion object {
@JvmStatic
fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
private fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
}
}