mirror of
https://github.com/gotson/komga.git
synced 2025-04-03 23:21:22 +08:00
fix: don't fail epub analysis when optional features are missing
Refs: #1909
This commit is contained in:
parent
1250a97d99
commit
465467c50c
@ -38,3 +38,8 @@
|
||||
| ERR_1032 | EPUB file has wrong media type |
|
||||
| ERR_1033 | Some entries are missing |
|
||||
| ERR_1034 | An API key with that comment already exists |
|
||||
| ERR_1035 | Error while getting EPUB TOC |
|
||||
| ERR_1036 | Error while getting EPUB Landmarks |
|
||||
| ERR_1037 | Error while getting EPUB page list |
|
||||
| ERR_1038 | Error while getting EPUB divina pages |
|
||||
| ERR_1039 | Error while getting EPUB positions |
|
||||
|
@ -827,7 +827,12 @@
|
||||
"ERR_1031": "ComicRack CBL Book is missing series or number",
|
||||
"ERR_1032": "EPUB file has wrong media type",
|
||||
"ERR_1033": "Some entries are missing",
|
||||
"ERR_1034": "An API key with that comment already exists"
|
||||
"ERR_1034": "An API key with that comment already exists",
|
||||
"ERR_1035": "Error while getting EPUB TOC",
|
||||
"ERR_1036": "Error while getting EPUB Landmarks",
|
||||
"ERR_1037": "Error while getting EPUB page list",
|
||||
"ERR_1038": "Error while getting EPUB divina pages",
|
||||
"ERR_1039": "Error while getting EPUB positions"
|
||||
},
|
||||
"filter": {
|
||||
"age_rating": "age rating",
|
||||
|
@ -23,6 +23,7 @@ import org.gotson.komga.infrastructure.image.ImageType
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||
import org.gotson.komga.infrastructure.mediacontainer.divina.DivinaExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
|
||||
import org.gotson.komga.infrastructure.mediacontainer.epub.epub
|
||||
import org.gotson.komga.infrastructure.mediacontainer.pdf.PdfExtractor
|
||||
import org.springframework.beans.factory.annotation.Qualifier
|
||||
import org.springframework.beans.factory.annotation.Value
|
||||
@ -143,29 +144,84 @@ class BookAnalyzer(
|
||||
book: Book,
|
||||
analyzeDimensions: Boolean,
|
||||
): Media {
|
||||
val manifest = epubExtractor.getManifest(book.path, analyzeDimensions)
|
||||
val entriesErrorSummary =
|
||||
manifest.missingResources
|
||||
.map { it.fileName }
|
||||
.ifEmpty { null }
|
||||
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
|
||||
return Media(
|
||||
status = Media.Status.READY,
|
||||
pages = manifest.divinaPages,
|
||||
files = manifest.resources,
|
||||
pageCount = manifest.pageCount,
|
||||
epubDivinaCompatible = manifest.divinaPages.isNotEmpty(),
|
||||
epubIsKepub = manifest.isKepub,
|
||||
extension =
|
||||
MediaExtensionEpub(
|
||||
toc = manifest.toc,
|
||||
landmarks = manifest.landmarks,
|
||||
pageList = manifest.pageList,
|
||||
isFixedLayout = manifest.isFixedLayout,
|
||||
positions = manifest.positions,
|
||||
),
|
||||
comment = entriesErrorSummary,
|
||||
)
|
||||
book.path.epub { epub ->
|
||||
val (resources, missingResources) = epubExtractor.getResources(epub).partition { it.fileSize != null }
|
||||
val isFixedLayout = epubExtractor.isFixedLayout(epub)
|
||||
val pageCount = epubExtractor.computePageCount(epub)
|
||||
val isKepub = epubExtractor.isKepub(epub, resources)
|
||||
|
||||
val errors = mutableListOf<String>()
|
||||
|
||||
val toc =
|
||||
try {
|
||||
epubExtractor.getToc(epub)
|
||||
} catch (e: Exception) {
|
||||
logger.error(e) { "Error while getting EPUB TOC" }
|
||||
errors.add("ERR_1035")
|
||||
emptyList()
|
||||
}
|
||||
|
||||
val landmarks =
|
||||
try {
|
||||
epubExtractor.getLandmarks(epub)
|
||||
} catch (e: Exception) {
|
||||
logger.error(e) { "Error while getting EPUB Landmarks" }
|
||||
errors.add("ERR_1036")
|
||||
emptyList()
|
||||
}
|
||||
|
||||
val pageList =
|
||||
try {
|
||||
epubExtractor.getPageList(epub)
|
||||
} catch (e: Exception) {
|
||||
logger.error(e) { "Error while getting EPUB page list" }
|
||||
errors.add("ERR_1037")
|
||||
emptyList()
|
||||
}
|
||||
|
||||
val divinaPages =
|
||||
try {
|
||||
epubExtractor.getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions)
|
||||
} catch (e: Exception) {
|
||||
logger.error(e) { "Error while getting EPUB Divina pages" }
|
||||
errors.add("ERR_1038")
|
||||
emptyList()
|
||||
}
|
||||
|
||||
val positions =
|
||||
try {
|
||||
epubExtractor.computePositions(epub, book.path, resources, isFixedLayout, isKepub)
|
||||
} catch (e: Exception) {
|
||||
logger.error(e) { "Error while getting EPUB positions" }
|
||||
errors.add("ERR_1039")
|
||||
emptyList()
|
||||
}
|
||||
|
||||
val entriesErrorSummary =
|
||||
missingResources
|
||||
.map { it.fileName }
|
||||
.ifEmpty { null }
|
||||
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
|
||||
val allErrors = (errors + entriesErrorSummary).joinToString(" ")
|
||||
|
||||
return Media(
|
||||
status = Media.Status.READY,
|
||||
pages = divinaPages,
|
||||
files = resources,
|
||||
pageCount = pageCount,
|
||||
epubDivinaCompatible = divinaPages.isNotEmpty(),
|
||||
epubIsKepub = isKepub,
|
||||
extension =
|
||||
MediaExtensionEpub(
|
||||
toc = toc,
|
||||
landmarks = landmarks,
|
||||
pageList = pageList,
|
||||
isFixedLayout = isFixedLayout,
|
||||
positions = positions,
|
||||
),
|
||||
comment = allErrors,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
private fun analyzePdf(
|
||||
|
@ -81,30 +81,7 @@ class EpubExtractor(
|
||||
}
|
||||
}
|
||||
|
||||
fun getManifest(
|
||||
path: Path,
|
||||
analyzeDimensions: Boolean,
|
||||
): EpubManifest =
|
||||
path.epub { epub ->
|
||||
val (resources, missingResources) = getResources(epub).partition { it.fileSize != null }
|
||||
val isFixedLayout = isFixedLayout(epub)
|
||||
val pageCount = computePageCount(epub)
|
||||
val isKepub = isKepub(epub, resources)
|
||||
EpubManifest(
|
||||
resources = resources,
|
||||
missingResources = missingResources,
|
||||
toc = getToc(epub),
|
||||
landmarks = getLandmarks(epub),
|
||||
pageList = getPageList(epub),
|
||||
pageCount = pageCount,
|
||||
isFixedLayout = isFixedLayout,
|
||||
positions = computePositions(epub, path, resources, isFixedLayout, isKepub),
|
||||
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
|
||||
isKepub = isKepub,
|
||||
)
|
||||
}
|
||||
|
||||
private fun getResources(epub: EpubPackage): List<MediaFile> {
|
||||
fun getResources(epub: EpubPackage): List<MediaFile> {
|
||||
val spine =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
@ -135,7 +112,7 @@ class EpubExtractor(
|
||||
}
|
||||
}
|
||||
|
||||
private fun getDivinaPages(
|
||||
fun getDivinaPages(
|
||||
epub: EpubPackage,
|
||||
isFixedLayout: Boolean,
|
||||
pageCount: Int,
|
||||
@ -146,72 +123,67 @@ class EpubExtractor(
|
||||
return emptyList()
|
||||
}
|
||||
|
||||
try {
|
||||
val pagesWithImages =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
.map { pagePath ->
|
||||
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
||||
val pagesWithImages =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
.map { pagePath ->
|
||||
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
||||
|
||||
// if a page has text over the threshold then the book is not divina compatible
|
||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||
// if a page has text over the threshold then the book is not divina compatible
|
||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||
|
||||
val img =
|
||||
doc
|
||||
.getElementsByTag("img")
|
||||
.map { it.attr("src") } // get the src, which can be a relative path
|
||||
val img =
|
||||
doc
|
||||
.getElementsByTag("img")
|
||||
.map { it.attr("src") } // get the src, which can be a relative path
|
||||
|
||||
val svg =
|
||||
doc
|
||||
.select("svg > image[xlink:href]")
|
||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||
val svg =
|
||||
doc
|
||||
.select("svg > image[xlink:href]")
|
||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||
|
||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||
}
|
||||
|
||||
if (pagesWithImages.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
|
||||
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
|
||||
if (imagesPath.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
|
||||
val divinaPages =
|
||||
imagesPath.mapNotNull { imagePath ->
|
||||
val mediaType =
|
||||
epub.manifest.values
|
||||
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
|
||||
?.mediaType ?: return@mapNotNull null
|
||||
val zipEntry = epub.zip.getEntry(imagePath)
|
||||
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
|
||||
|
||||
val dimension =
|
||||
if (analyzeDimensions)
|
||||
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
|
||||
else
|
||||
null
|
||||
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
|
||||
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
|
||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||
}
|
||||
|
||||
if (divinaPages.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
return divinaPages
|
||||
} catch (e: Exception) {
|
||||
logger.warn(e) { "Error while getting divina pages" }
|
||||
if (pagesWithImages.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
|
||||
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
|
||||
if (imagesPath.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
|
||||
val divinaPages =
|
||||
imagesPath.mapNotNull { imagePath ->
|
||||
val mediaType =
|
||||
epub.manifest.values
|
||||
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
|
||||
?.mediaType ?: return@mapNotNull null
|
||||
val zipEntry = epub.zip.getEntry(imagePath)
|
||||
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
|
||||
|
||||
val dimension =
|
||||
if (analyzeDimensions)
|
||||
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
|
||||
else
|
||||
null
|
||||
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
|
||||
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
|
||||
}
|
||||
|
||||
if (divinaPages.size != pageCount) {
|
||||
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
|
||||
return emptyList()
|
||||
}
|
||||
return divinaPages
|
||||
}
|
||||
|
||||
private fun isKepub(
|
||||
fun isKepub(
|
||||
epub: EpubPackage,
|
||||
resources: List<MediaFile>,
|
||||
): Boolean {
|
||||
@ -228,7 +200,7 @@ class EpubExtractor(
|
||||
return false
|
||||
}
|
||||
|
||||
private fun computePageCount(epub: EpubPackage): Int {
|
||||
fun computePageCount(epub: EpubPackage): Int {
|
||||
val spine =
|
||||
epub.opfDoc
|
||||
.select("spine > itemref")
|
||||
@ -241,11 +213,11 @@ class EpubExtractor(
|
||||
.sumOf { ceil(it.compressedSize / 1024.0).toInt() }
|
||||
}
|
||||
|
||||
private fun isFixedLayout(epub: EpubPackage) =
|
||||
fun isFixedLayout(epub: EpubPackage) =
|
||||
epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
|
||||
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
|
||||
|
||||
private fun computePositions(
|
||||
fun computePositions(
|
||||
epub: EpubPackage,
|
||||
path: Path,
|
||||
resources: List<MediaFile>,
|
||||
@ -346,7 +318,7 @@ class EpubExtractor(
|
||||
}
|
||||
}
|
||||
|
||||
private fun getToc(epub: EpubPackage): List<EpubTocEntry> {
|
||||
fun getToc(epub: EpubPackage): List<EpubTocEntry> {
|
||||
// Epub 3
|
||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) }
|
||||
// Epub 2
|
||||
@ -354,7 +326,7 @@ class EpubExtractor(
|
||||
return emptyList()
|
||||
}
|
||||
|
||||
private fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
|
||||
fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
|
||||
// Epub 3
|
||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.PAGELIST) }
|
||||
// Epub 2
|
||||
@ -362,7 +334,7 @@ class EpubExtractor(
|
||||
return emptyList()
|
||||
}
|
||||
|
||||
private fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
|
||||
fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
|
||||
// Epub 3
|
||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.LANDMARKS) }
|
||||
|
||||
|
@ -1,19 +0,0 @@
|
||||
package org.gotson.komga.infrastructure.mediacontainer.epub
|
||||
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.gotson.komga.domain.model.EpubTocEntry
|
||||
import org.gotson.komga.domain.model.MediaFile
|
||||
import org.gotson.komga.domain.model.R2Locator
|
||||
|
||||
data class EpubManifest(
|
||||
val resources: List<MediaFile>,
|
||||
val missingResources: List<MediaFile>,
|
||||
val toc: List<EpubTocEntry>,
|
||||
val landmarks: List<EpubTocEntry>,
|
||||
val pageList: List<EpubTocEntry>,
|
||||
val pageCount: Int,
|
||||
val isFixedLayout: Boolean,
|
||||
val positions: List<R2Locator>,
|
||||
val divinaPages: List<BookPage>,
|
||||
val isKepub: Boolean,
|
||||
)
|
@ -1,6 +1,7 @@
|
||||
package org.gotson.komga.domain.service
|
||||
|
||||
import com.ninjasquad.springmockk.SpykBean
|
||||
import io.mockk.clearAllMocks
|
||||
import io.mockk.every
|
||||
import io.mockk.verify
|
||||
import org.assertj.core.api.Assertions.assertThat
|
||||
@ -8,8 +9,12 @@ import org.gotson.komga.domain.model.Book
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.gotson.komga.domain.model.BookWithMedia
|
||||
import org.gotson.komga.domain.model.Media
|
||||
import org.gotson.komga.domain.model.MediaExtensionEpub
|
||||
import org.gotson.komga.domain.model.makeBook
|
||||
import org.gotson.komga.infrastructure.configuration.KomgaProperties
|
||||
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
|
||||
import org.junit.jupiter.api.AfterEach
|
||||
import org.junit.jupiter.api.Nested
|
||||
import org.junit.jupiter.api.Test
|
||||
import org.junit.jupiter.params.ParameterizedTest
|
||||
import org.junit.jupiter.params.provider.MethodSource
|
||||
@ -32,194 +37,291 @@ class BookAnalyzerTest(
|
||||
@SpykBean
|
||||
private lateinit var bookAnalyzer: BookAnalyzer
|
||||
|
||||
@Test
|
||||
fun `given rar4 archive when analyzing then media status is READY`() {
|
||||
val file = ClassPathResource("archives/rar4.rar")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
@SpykBean
|
||||
private lateinit var epubExtractor: EpubExtractor
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(3)
|
||||
@AfterEach
|
||||
fun afterEach() {
|
||||
clearAllMocks()
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"rar4-solid.rar", "rar4-encrypted.rar",
|
||||
],
|
||||
)
|
||||
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/rar4-solid.rar")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
@Nested
|
||||
inner class ArchiveFormats {
|
||||
@Test
|
||||
fun `given rar4 archive when analyzing then media status is READY`() {
|
||||
val file = ClassPathResource("archives/rar4.rar")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(3)
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"rar4-solid.rar", "rar4-encrypted.rar",
|
||||
],
|
||||
)
|
||||
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/rar4-solid.rar")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
|
||||
],
|
||||
)
|
||||
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"7zip.7z", "7zip-encrypted.7z",
|
||||
],
|
||||
)
|
||||
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
|
||||
],
|
||||
)
|
||||
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(1)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
|
||||
val file = ClassPathResource("archives/zip-encrypted.zip")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when analyzing then media status is READY`() {
|
||||
val file = ClassPathResource("archives/epub3.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(0)
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
|
||||
],
|
||||
)
|
||||
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
@Nested
|
||||
inner class Epub {
|
||||
@Test
|
||||
fun `given broken epub archive when analyzing then media status is ERROR`() {
|
||||
val file = ClassPathResource("archives/zip-as-epub.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||
assertThat(media.pages).hasSize(0)
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when toc cannot be extracted then media status is READY with comments`() {
|
||||
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
every { epubExtractor.getToc(any()) } throws Exception("mock exception")
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val extension = media.extension as? MediaExtensionEpub
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.comment).contains("ERR_1035")
|
||||
assertThat(extension).isNotNull
|
||||
assertThat(extension!!.toc).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when landmarks cannot be extracted then media status is READY with comments`() {
|
||||
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
every { epubExtractor.getLandmarks(any()) } throws Exception("mock exception")
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val extension = media.extension as? MediaExtensionEpub
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.comment).contains("ERR_1036")
|
||||
assertThat(extension).isNotNull
|
||||
assertThat(extension!!.landmarks).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when page list cannot be extracted then media status is READY with comments`() {
|
||||
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
every { epubExtractor.getPageList(any()) } throws Exception("mock exception")
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val extension = media.extension as? MediaExtensionEpub
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.comment).contains("ERR_1037")
|
||||
assertThat(extension).isNotNull
|
||||
assertThat(extension!!.pageList).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when divina pages cannot be extracted then media status is READY with comments`() {
|
||||
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
every { epubExtractor.getDivinaPages(any(), any(), any(), any()) } throws Exception("mock exception")
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.comment).contains("ERR_1038")
|
||||
assertThat(media.pages).isEmpty()
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given epub archive when positions cannot be extracted then media status is READY with comments`() {
|
||||
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
|
||||
every { epubExtractor.computePositions(any(), any(), any(), any(), any()) } throws Exception("mock exception")
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val extension = media.extension as? MediaExtensionEpub
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.comment).contains("ERR_1039")
|
||||
assertThat(extension).isNotNull
|
||||
assertThat(extension!!.positions).isEmpty()
|
||||
}
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"7zip.7z", "7zip-encrypted.7z",
|
||||
],
|
||||
)
|
||||
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
@Nested
|
||||
inner class PageHashing {
|
||||
@Test
|
||||
fun `given book with a single page when hashing then all pages are hashed`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
|
||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||
}
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
@ParameterizedTest
|
||||
@ValueSource(
|
||||
strings = [
|
||||
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
|
||||
],
|
||||
)
|
||||
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
|
||||
val file = ClassPathResource("archives/$fileName")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
assertThat(hashedMedia.pages).hasSize(1)
|
||||
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
|
||||
}
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
@Test
|
||||
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(1)
|
||||
}
|
||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||
|
||||
@Test
|
||||
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
|
||||
val file = ClassPathResource("archives/zip-encrypted.zip")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
assertThat(hashedMedia.pages).hasSize(30)
|
||||
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
|
||||
.hasSize(komgaProperties.pageHashing)
|
||||
.containsOnly("hashed")
|
||||
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
|
||||
.hasSize(komgaProperties.pageHashing)
|
||||
.containsOnly("hashed")
|
||||
assertThat(
|
||||
hashedMedia.pages
|
||||
.drop(komgaProperties.pageHashing)
|
||||
.dropLast(komgaProperties.pageHashing)
|
||||
.map { it.fileHash },
|
||||
).hasSize(30 - (komgaProperties.pageHashing * 2))
|
||||
.containsOnly("")
|
||||
}
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||
}
|
||||
@Test
|
||||
fun `given book with already hashed pages when hashing then no hashing is done`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
|
||||
@Test
|
||||
fun `given epub archive when analyzing then media status is READY`() {
|
||||
val file = ClassPathResource("archives/epub3.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
|
||||
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||
assertThat(media.pages).hasSize(0)
|
||||
}
|
||||
assertThat(hashedMedia.pages.map { it.fileHash })
|
||||
.hasSize(30)
|
||||
.containsOnly("hashed")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given broken epub archive when analyzing then media status is ERROR`() {
|
||||
val file = ClassPathResource("archives/zip-as-epub.epub")
|
||||
val book = Book("book", file.url, LocalDateTime.now())
|
||||
@ParameterizedTest
|
||||
@MethodSource("provideDirectoriesForPageHashing")
|
||||
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
|
||||
val files = directory.listDirectoryEntries()
|
||||
assertThat(files).hasSize(2)
|
||||
|
||||
val media = bookAnalyzer.analyze(book, false)
|
||||
val mediaType = "image/${directory.fileName.extension}"
|
||||
|
||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||
assertThat(media.pages).hasSize(0)
|
||||
}
|
||||
val hashes =
|
||||
files.map {
|
||||
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given book with a single page when hashing then all pages are hashed`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
assertThat(hashes.first()).isEqualTo(hashes.last())
|
||||
}
|
||||
|
||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
assertThat(hashedMedia.pages).hasSize(1)
|
||||
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
|
||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
assertThat(hashedMedia.pages).hasSize(30)
|
||||
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
|
||||
.hasSize(komgaProperties.pageHashing)
|
||||
.containsOnly("hashed")
|
||||
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
|
||||
.hasSize(komgaProperties.pageHashing)
|
||||
.containsOnly("hashed")
|
||||
assertThat(
|
||||
hashedMedia.pages
|
||||
.drop(komgaProperties.pageHashing)
|
||||
.dropLast(komgaProperties.pageHashing)
|
||||
.map { it.fileHash },
|
||||
).hasSize(30 - (komgaProperties.pageHashing * 2))
|
||||
.containsOnly("")
|
||||
}
|
||||
|
||||
@Test
|
||||
fun `given book with already hashed pages when hashing then no hashing is done`() {
|
||||
val book = makeBook("book1")
|
||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
|
||||
val media = Media(Media.Status.READY, pages = pages)
|
||||
|
||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||
|
||||
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
|
||||
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
|
||||
|
||||
assertThat(hashedMedia.pages.map { it.fileHash })
|
||||
.hasSize(30)
|
||||
.containsOnly("hashed")
|
||||
}
|
||||
|
||||
@ParameterizedTest
|
||||
@MethodSource("provideDirectoriesForPageHashing")
|
||||
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
|
||||
val files = directory.listDirectoryEntries()
|
||||
assertThat(files).hasSize(2)
|
||||
|
||||
val mediaType = "image/${directory.fileName.extension}"
|
||||
|
||||
val hashes =
|
||||
files.map {
|
||||
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
|
||||
}
|
||||
|
||||
assertThat(hashes.first()).isEqualTo(hashes.last())
|
||||
}
|
||||
|
||||
companion object {
|
||||
@JvmStatic
|
||||
fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
|
||||
private fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user