mirror of
https://github.com/gotson/komga.git
synced 2025-04-04 22:33:31 +08:00
fix: don't fail epub analysis when optional features are missing
Refs: #1909
This commit is contained in:
parent
1250a97d99
commit
465467c50c
@ -38,3 +38,8 @@
|
|||||||
| ERR_1032 | EPUB file has wrong media type |
|
| ERR_1032 | EPUB file has wrong media type |
|
||||||
| ERR_1033 | Some entries are missing |
|
| ERR_1033 | Some entries are missing |
|
||||||
| ERR_1034 | An API key with that comment already exists |
|
| ERR_1034 | An API key with that comment already exists |
|
||||||
|
| ERR_1035 | Error while getting EPUB TOC |
|
||||||
|
| ERR_1036 | Error while getting EPUB Landmarks |
|
||||||
|
| ERR_1037 | Error while getting EPUB page list |
|
||||||
|
| ERR_1038 | Error while getting EPUB divina pages |
|
||||||
|
| ERR_1039 | Error while getting EPUB positions |
|
||||||
|
@ -827,7 +827,12 @@
|
|||||||
"ERR_1031": "ComicRack CBL Book is missing series or number",
|
"ERR_1031": "ComicRack CBL Book is missing series or number",
|
||||||
"ERR_1032": "EPUB file has wrong media type",
|
"ERR_1032": "EPUB file has wrong media type",
|
||||||
"ERR_1033": "Some entries are missing",
|
"ERR_1033": "Some entries are missing",
|
||||||
"ERR_1034": "An API key with that comment already exists"
|
"ERR_1034": "An API key with that comment already exists",
|
||||||
|
"ERR_1035": "Error while getting EPUB TOC",
|
||||||
|
"ERR_1036": "Error while getting EPUB Landmarks",
|
||||||
|
"ERR_1037": "Error while getting EPUB page list",
|
||||||
|
"ERR_1038": "Error while getting EPUB divina pages",
|
||||||
|
"ERR_1039": "Error while getting EPUB positions"
|
||||||
},
|
},
|
||||||
"filter": {
|
"filter": {
|
||||||
"age_rating": "age rating",
|
"age_rating": "age rating",
|
||||||
|
@ -23,6 +23,7 @@ import org.gotson.komga.infrastructure.image.ImageType
|
|||||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||||
import org.gotson.komga.infrastructure.mediacontainer.divina.DivinaExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.divina.DivinaExtractor
|
||||||
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
|
||||||
|
import org.gotson.komga.infrastructure.mediacontainer.epub.epub
|
||||||
import org.gotson.komga.infrastructure.mediacontainer.pdf.PdfExtractor
|
import org.gotson.komga.infrastructure.mediacontainer.pdf.PdfExtractor
|
||||||
import org.springframework.beans.factory.annotation.Qualifier
|
import org.springframework.beans.factory.annotation.Qualifier
|
||||||
import org.springframework.beans.factory.annotation.Value
|
import org.springframework.beans.factory.annotation.Value
|
||||||
@ -143,29 +144,84 @@ class BookAnalyzer(
|
|||||||
book: Book,
|
book: Book,
|
||||||
analyzeDimensions: Boolean,
|
analyzeDimensions: Boolean,
|
||||||
): Media {
|
): Media {
|
||||||
val manifest = epubExtractor.getManifest(book.path, analyzeDimensions)
|
book.path.epub { epub ->
|
||||||
val entriesErrorSummary =
|
val (resources, missingResources) = epubExtractor.getResources(epub).partition { it.fileSize != null }
|
||||||
manifest.missingResources
|
val isFixedLayout = epubExtractor.isFixedLayout(epub)
|
||||||
.map { it.fileName }
|
val pageCount = epubExtractor.computePageCount(epub)
|
||||||
.ifEmpty { null }
|
val isKepub = epubExtractor.isKepub(epub, resources)
|
||||||
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
|
|
||||||
return Media(
|
val errors = mutableListOf<String>()
|
||||||
status = Media.Status.READY,
|
|
||||||
pages = manifest.divinaPages,
|
val toc =
|
||||||
files = manifest.resources,
|
try {
|
||||||
pageCount = manifest.pageCount,
|
epubExtractor.getToc(epub)
|
||||||
epubDivinaCompatible = manifest.divinaPages.isNotEmpty(),
|
} catch (e: Exception) {
|
||||||
epubIsKepub = manifest.isKepub,
|
logger.error(e) { "Error while getting EPUB TOC" }
|
||||||
extension =
|
errors.add("ERR_1035")
|
||||||
MediaExtensionEpub(
|
emptyList()
|
||||||
toc = manifest.toc,
|
}
|
||||||
landmarks = manifest.landmarks,
|
|
||||||
pageList = manifest.pageList,
|
val landmarks =
|
||||||
isFixedLayout = manifest.isFixedLayout,
|
try {
|
||||||
positions = manifest.positions,
|
epubExtractor.getLandmarks(epub)
|
||||||
),
|
} catch (e: Exception) {
|
||||||
comment = entriesErrorSummary,
|
logger.error(e) { "Error while getting EPUB Landmarks" }
|
||||||
)
|
errors.add("ERR_1036")
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
val pageList =
|
||||||
|
try {
|
||||||
|
epubExtractor.getPageList(epub)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logger.error(e) { "Error while getting EPUB page list" }
|
||||||
|
errors.add("ERR_1037")
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
val divinaPages =
|
||||||
|
try {
|
||||||
|
epubExtractor.getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logger.error(e) { "Error while getting EPUB Divina pages" }
|
||||||
|
errors.add("ERR_1038")
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
val positions =
|
||||||
|
try {
|
||||||
|
epubExtractor.computePositions(epub, book.path, resources, isFixedLayout, isKepub)
|
||||||
|
} catch (e: Exception) {
|
||||||
|
logger.error(e) { "Error while getting EPUB positions" }
|
||||||
|
errors.add("ERR_1039")
|
||||||
|
emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
val entriesErrorSummary =
|
||||||
|
missingResources
|
||||||
|
.map { it.fileName }
|
||||||
|
.ifEmpty { null }
|
||||||
|
?.joinToString(prefix = "ERR_1033 [", postfix = "]") { it }
|
||||||
|
val allErrors = (errors + entriesErrorSummary).joinToString(" ")
|
||||||
|
|
||||||
|
return Media(
|
||||||
|
status = Media.Status.READY,
|
||||||
|
pages = divinaPages,
|
||||||
|
files = resources,
|
||||||
|
pageCount = pageCount,
|
||||||
|
epubDivinaCompatible = divinaPages.isNotEmpty(),
|
||||||
|
epubIsKepub = isKepub,
|
||||||
|
extension =
|
||||||
|
MediaExtensionEpub(
|
||||||
|
toc = toc,
|
||||||
|
landmarks = landmarks,
|
||||||
|
pageList = pageList,
|
||||||
|
isFixedLayout = isFixedLayout,
|
||||||
|
positions = positions,
|
||||||
|
),
|
||||||
|
comment = allErrors,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun analyzePdf(
|
private fun analyzePdf(
|
||||||
|
@ -81,30 +81,7 @@ class EpubExtractor(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getManifest(
|
fun getResources(epub: EpubPackage): List<MediaFile> {
|
||||||
path: Path,
|
|
||||||
analyzeDimensions: Boolean,
|
|
||||||
): EpubManifest =
|
|
||||||
path.epub { epub ->
|
|
||||||
val (resources, missingResources) = getResources(epub).partition { it.fileSize != null }
|
|
||||||
val isFixedLayout = isFixedLayout(epub)
|
|
||||||
val pageCount = computePageCount(epub)
|
|
||||||
val isKepub = isKepub(epub, resources)
|
|
||||||
EpubManifest(
|
|
||||||
resources = resources,
|
|
||||||
missingResources = missingResources,
|
|
||||||
toc = getToc(epub),
|
|
||||||
landmarks = getLandmarks(epub),
|
|
||||||
pageList = getPageList(epub),
|
|
||||||
pageCount = pageCount,
|
|
||||||
isFixedLayout = isFixedLayout,
|
|
||||||
positions = computePositions(epub, path, resources, isFixedLayout, isKepub),
|
|
||||||
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
|
|
||||||
isKepub = isKepub,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
private fun getResources(epub: EpubPackage): List<MediaFile> {
|
|
||||||
val spine =
|
val spine =
|
||||||
epub.opfDoc
|
epub.opfDoc
|
||||||
.select("spine > itemref")
|
.select("spine > itemref")
|
||||||
@ -135,7 +112,7 @@ class EpubExtractor(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun getDivinaPages(
|
fun getDivinaPages(
|
||||||
epub: EpubPackage,
|
epub: EpubPackage,
|
||||||
isFixedLayout: Boolean,
|
isFixedLayout: Boolean,
|
||||||
pageCount: Int,
|
pageCount: Int,
|
||||||
@ -146,72 +123,67 @@ class EpubExtractor(
|
|||||||
return emptyList()
|
return emptyList()
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
val pagesWithImages =
|
||||||
val pagesWithImages =
|
epub.opfDoc
|
||||||
epub.opfDoc
|
.select("spine > itemref")
|
||||||
.select("spine > itemref")
|
.map { it.attr("idref") }
|
||||||
.map { it.attr("idref") }
|
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
.map { pagePath ->
|
||||||
.map { pagePath ->
|
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
||||||
val doc = epub.zip.getEntryInputStream(pagePath)?.use { Jsoup.parse(it, null, "") } ?: return@map emptyList()
|
|
||||||
|
|
||||||
// if a page has text over the threshold then the book is not divina compatible
|
// if a page has text over the threshold then the book is not divina compatible
|
||||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||||
|
|
||||||
val img =
|
val img =
|
||||||
doc
|
doc
|
||||||
.getElementsByTag("img")
|
.getElementsByTag("img")
|
||||||
.map { it.attr("src") } // get the src, which can be a relative path
|
.map { it.attr("src") } // get the src, which can be a relative path
|
||||||
|
|
||||||
val svg =
|
val svg =
|
||||||
doc
|
doc
|
||||||
.select("svg > image[xlink:href]")
|
.select("svg > image[xlink:href]")
|
||||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||||
|
|
||||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||||
}
|
|
||||||
|
|
||||||
if (pagesWithImages.size != pageCount) {
|
|
||||||
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
|
|
||||||
return emptyList()
|
|
||||||
}
|
|
||||||
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
|
|
||||||
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
|
|
||||||
if (imagesPath.size != pageCount) {
|
|
||||||
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
|
|
||||||
return emptyList()
|
|
||||||
}
|
|
||||||
|
|
||||||
val divinaPages =
|
|
||||||
imagesPath.mapNotNull { imagePath ->
|
|
||||||
val mediaType =
|
|
||||||
epub.manifest.values
|
|
||||||
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
|
|
||||||
?.mediaType ?: return@mapNotNull null
|
|
||||||
val zipEntry = epub.zip.getEntry(imagePath)
|
|
||||||
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
|
|
||||||
|
|
||||||
val dimension =
|
|
||||||
if (analyzeDimensions)
|
|
||||||
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
|
|
||||||
else
|
|
||||||
null
|
|
||||||
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
|
|
||||||
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (divinaPages.size != pageCount) {
|
if (pagesWithImages.size != pageCount) {
|
||||||
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
|
logger.info { "Epub Divina detection failed: book has ${pagesWithImages.size} pages with images, but $pageCount total pages" }
|
||||||
return emptyList()
|
|
||||||
}
|
|
||||||
return divinaPages
|
|
||||||
} catch (e: Exception) {
|
|
||||||
logger.warn(e) { "Error while getting divina pages" }
|
|
||||||
return emptyList()
|
return emptyList()
|
||||||
}
|
}
|
||||||
|
// Only keep unique image path for each page. KCC sometimes generates HTML pages with 5 times the same image.
|
||||||
|
val imagesPath = pagesWithImages.map { it.distinct() }.flatten()
|
||||||
|
if (imagesPath.size != pageCount) {
|
||||||
|
logger.info { "Epub Divina detection failed: book has ${imagesPath.size} detected images, but $pageCount total pages" }
|
||||||
|
return emptyList()
|
||||||
|
}
|
||||||
|
|
||||||
|
val divinaPages =
|
||||||
|
imagesPath.mapNotNull { imagePath ->
|
||||||
|
val mediaType =
|
||||||
|
epub.manifest.values
|
||||||
|
.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }
|
||||||
|
?.mediaType ?: return@mapNotNull null
|
||||||
|
val zipEntry = epub.zip.getEntry(imagePath)
|
||||||
|
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
|
||||||
|
|
||||||
|
val dimension =
|
||||||
|
if (analyzeDimensions)
|
||||||
|
epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
|
||||||
|
else
|
||||||
|
null
|
||||||
|
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
|
||||||
|
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (divinaPages.size != pageCount) {
|
||||||
|
logger.info { "Epub Divina detection failed: book has ${divinaPages.size} detected divina pages, but $pageCount total pages" }
|
||||||
|
return emptyList()
|
||||||
|
}
|
||||||
|
return divinaPages
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun isKepub(
|
fun isKepub(
|
||||||
epub: EpubPackage,
|
epub: EpubPackage,
|
||||||
resources: List<MediaFile>,
|
resources: List<MediaFile>,
|
||||||
): Boolean {
|
): Boolean {
|
||||||
@ -228,7 +200,7 @@ class EpubExtractor(
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun computePageCount(epub: EpubPackage): Int {
|
fun computePageCount(epub: EpubPackage): Int {
|
||||||
val spine =
|
val spine =
|
||||||
epub.opfDoc
|
epub.opfDoc
|
||||||
.select("spine > itemref")
|
.select("spine > itemref")
|
||||||
@ -241,11 +213,11 @@ class EpubExtractor(
|
|||||||
.sumOf { ceil(it.compressedSize / 1024.0).toInt() }
|
.sumOf { ceil(it.compressedSize / 1024.0).toInt() }
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun isFixedLayout(epub: EpubPackage) =
|
fun isFixedLayout(epub: EpubPackage) =
|
||||||
epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
|
epub.opfDoc.selectFirst("metadata > *|meta[property=rendition:layout]")?.text() == "pre-paginated" ||
|
||||||
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
|
epub.opfDoc.selectFirst("metadata > *|meta[name=fixed-layout]")?.attr("content") == "true"
|
||||||
|
|
||||||
private fun computePositions(
|
fun computePositions(
|
||||||
epub: EpubPackage,
|
epub: EpubPackage,
|
||||||
path: Path,
|
path: Path,
|
||||||
resources: List<MediaFile>,
|
resources: List<MediaFile>,
|
||||||
@ -346,7 +318,7 @@ class EpubExtractor(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun getToc(epub: EpubPackage): List<EpubTocEntry> {
|
fun getToc(epub: EpubPackage): List<EpubTocEntry> {
|
||||||
// Epub 3
|
// Epub 3
|
||||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) }
|
epub.getNavResource()?.let { return processNav(it, Epub3Nav.TOC) }
|
||||||
// Epub 2
|
// Epub 2
|
||||||
@ -354,7 +326,7 @@ class EpubExtractor(
|
|||||||
return emptyList()
|
return emptyList()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
|
fun getPageList(epub: EpubPackage): List<EpubTocEntry> {
|
||||||
// Epub 3
|
// Epub 3
|
||||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.PAGELIST) }
|
epub.getNavResource()?.let { return processNav(it, Epub3Nav.PAGELIST) }
|
||||||
// Epub 2
|
// Epub 2
|
||||||
@ -362,7 +334,7 @@ class EpubExtractor(
|
|||||||
return emptyList()
|
return emptyList()
|
||||||
}
|
}
|
||||||
|
|
||||||
private fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
|
fun getLandmarks(epub: EpubPackage): List<EpubTocEntry> {
|
||||||
// Epub 3
|
// Epub 3
|
||||||
epub.getNavResource()?.let { return processNav(it, Epub3Nav.LANDMARKS) }
|
epub.getNavResource()?.let { return processNav(it, Epub3Nav.LANDMARKS) }
|
||||||
|
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
package org.gotson.komga.infrastructure.mediacontainer.epub
|
|
||||||
|
|
||||||
import org.gotson.komga.domain.model.BookPage
|
|
||||||
import org.gotson.komga.domain.model.EpubTocEntry
|
|
||||||
import org.gotson.komga.domain.model.MediaFile
|
|
||||||
import org.gotson.komga.domain.model.R2Locator
|
|
||||||
|
|
||||||
data class EpubManifest(
|
|
||||||
val resources: List<MediaFile>,
|
|
||||||
val missingResources: List<MediaFile>,
|
|
||||||
val toc: List<EpubTocEntry>,
|
|
||||||
val landmarks: List<EpubTocEntry>,
|
|
||||||
val pageList: List<EpubTocEntry>,
|
|
||||||
val pageCount: Int,
|
|
||||||
val isFixedLayout: Boolean,
|
|
||||||
val positions: List<R2Locator>,
|
|
||||||
val divinaPages: List<BookPage>,
|
|
||||||
val isKepub: Boolean,
|
|
||||||
)
|
|
@ -1,6 +1,7 @@
|
|||||||
package org.gotson.komga.domain.service
|
package org.gotson.komga.domain.service
|
||||||
|
|
||||||
import com.ninjasquad.springmockk.SpykBean
|
import com.ninjasquad.springmockk.SpykBean
|
||||||
|
import io.mockk.clearAllMocks
|
||||||
import io.mockk.every
|
import io.mockk.every
|
||||||
import io.mockk.verify
|
import io.mockk.verify
|
||||||
import org.assertj.core.api.Assertions.assertThat
|
import org.assertj.core.api.Assertions.assertThat
|
||||||
@ -8,8 +9,12 @@ import org.gotson.komga.domain.model.Book
|
|||||||
import org.gotson.komga.domain.model.BookPage
|
import org.gotson.komga.domain.model.BookPage
|
||||||
import org.gotson.komga.domain.model.BookWithMedia
|
import org.gotson.komga.domain.model.BookWithMedia
|
||||||
import org.gotson.komga.domain.model.Media
|
import org.gotson.komga.domain.model.Media
|
||||||
|
import org.gotson.komga.domain.model.MediaExtensionEpub
|
||||||
import org.gotson.komga.domain.model.makeBook
|
import org.gotson.komga.domain.model.makeBook
|
||||||
import org.gotson.komga.infrastructure.configuration.KomgaProperties
|
import org.gotson.komga.infrastructure.configuration.KomgaProperties
|
||||||
|
import org.gotson.komga.infrastructure.mediacontainer.epub.EpubExtractor
|
||||||
|
import org.junit.jupiter.api.AfterEach
|
||||||
|
import org.junit.jupiter.api.Nested
|
||||||
import org.junit.jupiter.api.Test
|
import org.junit.jupiter.api.Test
|
||||||
import org.junit.jupiter.params.ParameterizedTest
|
import org.junit.jupiter.params.ParameterizedTest
|
||||||
import org.junit.jupiter.params.provider.MethodSource
|
import org.junit.jupiter.params.provider.MethodSource
|
||||||
@ -32,194 +37,291 @@ class BookAnalyzerTest(
|
|||||||
@SpykBean
|
@SpykBean
|
||||||
private lateinit var bookAnalyzer: BookAnalyzer
|
private lateinit var bookAnalyzer: BookAnalyzer
|
||||||
|
|
||||||
@Test
|
@SpykBean
|
||||||
fun `given rar4 archive when analyzing then media status is READY`() {
|
private lateinit var epubExtractor: EpubExtractor
|
||||||
val file = ClassPathResource("archives/rar4.rar")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
@AfterEach
|
||||||
|
fun afterEach() {
|
||||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
clearAllMocks()
|
||||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
|
||||||
assertThat(media.pages).hasSize(3)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Nested
|
||||||
@ValueSource(
|
inner class ArchiveFormats {
|
||||||
strings = [
|
@Test
|
||||||
"rar4-solid.rar", "rar4-encrypted.rar",
|
fun `given rar4 archive when analyzing then media status is READY`() {
|
||||||
],
|
val file = ClassPathResource("archives/rar4.rar")
|
||||||
)
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
|
||||||
val file = ClassPathResource("archives/rar4-solid.rar")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.pages).hasSize(3)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(
|
||||||
|
strings = [
|
||||||
|
"rar4-solid.rar", "rar4-encrypted.rar",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
fun `given rar4 solid or encrypted archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||||
|
val file = ClassPathResource("archives/rar4-solid.rar")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=4")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(
|
||||||
|
strings = [
|
||||||
|
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||||
|
val file = ClassPathResource("archives/$fileName")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(
|
||||||
|
strings = [
|
||||||
|
"7zip.7z", "7zip-encrypted.7z",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
||||||
|
val file = ClassPathResource("archives/$fileName")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
||||||
|
}
|
||||||
|
|
||||||
|
@ParameterizedTest
|
||||||
|
@ValueSource(
|
||||||
|
strings = [
|
||||||
|
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
|
||||||
|
val file = ClassPathResource("archives/$fileName")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.pages).hasSize(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
|
||||||
|
val file = ClassPathResource("archives/zip-encrypted.zip")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when analyzing then media status is READY`() {
|
||||||
|
val file = ClassPathResource("archives/epub3.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.pages).hasSize(0)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Nested
|
||||||
@ValueSource(
|
inner class Epub {
|
||||||
strings = [
|
@Test
|
||||||
"rar5.rar", "rar5-solid.rar", "rar5-encrypted.rar",
|
fun `given broken epub archive when analyzing then media status is ERROR`() {
|
||||||
],
|
val file = ClassPathResource("archives/zip-as-epub.epub")
|
||||||
)
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
fun `given rar5 archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
|
||||||
val file = ClassPathResource("archives/$fileName")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/x-rar-compressed; version=5")
|
assertThat(media.mediaType).isEqualTo("application/zip")
|
||||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
||||||
|
assertThat(media.pages).hasSize(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when toc cannot be extracted then media status is READY with comments`() {
|
||||||
|
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
every { epubExtractor.getToc(any()) } throws Exception("mock exception")
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
val extension = media.extension as? MediaExtensionEpub
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.comment).contains("ERR_1035")
|
||||||
|
assertThat(extension).isNotNull
|
||||||
|
assertThat(extension!!.toc).isEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when landmarks cannot be extracted then media status is READY with comments`() {
|
||||||
|
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
every { epubExtractor.getLandmarks(any()) } throws Exception("mock exception")
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
val extension = media.extension as? MediaExtensionEpub
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.comment).contains("ERR_1036")
|
||||||
|
assertThat(extension).isNotNull
|
||||||
|
assertThat(extension!!.landmarks).isEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when page list cannot be extracted then media status is READY with comments`() {
|
||||||
|
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
every { epubExtractor.getPageList(any()) } throws Exception("mock exception")
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
val extension = media.extension as? MediaExtensionEpub
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.comment).contains("ERR_1037")
|
||||||
|
assertThat(extension).isNotNull
|
||||||
|
assertThat(extension!!.pageList).isEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when divina pages cannot be extracted then media status is READY with comments`() {
|
||||||
|
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
every { epubExtractor.getDivinaPages(any(), any(), any(), any()) } throws Exception("mock exception")
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.comment).contains("ERR_1038")
|
||||||
|
assertThat(media.pages).isEmpty()
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
fun `given epub archive when positions cannot be extracted then media status is READY with comments`() {
|
||||||
|
val file = ClassPathResource("epub/The Incomplete Theft - Ralph Burke.epub")
|
||||||
|
val book = Book("book", file.url, LocalDateTime.now())
|
||||||
|
|
||||||
|
every { epubExtractor.computePositions(any(), any(), any(), any(), any()) } throws Exception("mock exception")
|
||||||
|
|
||||||
|
val media = bookAnalyzer.analyze(book, false)
|
||||||
|
val extension = media.extension as? MediaExtensionEpub
|
||||||
|
|
||||||
|
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
||||||
|
assertThat(media.status).isEqualTo(Media.Status.READY)
|
||||||
|
assertThat(media.comment).contains("ERR_1039")
|
||||||
|
assertThat(extension).isNotNull
|
||||||
|
assertThat(extension!!.positions).isEmpty()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ParameterizedTest
|
@Nested
|
||||||
@ValueSource(
|
inner class PageHashing {
|
||||||
strings = [
|
@Test
|
||||||
"7zip.7z", "7zip-encrypted.7z",
|
fun `given book with a single page when hashing then all pages are hashed`() {
|
||||||
],
|
val book = makeBook("book1")
|
||||||
)
|
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
|
||||||
fun `given 7zip archive when analyzing then media status is UNSUPPORTED`(fileName: String) {
|
val media = Media(Media.Status.READY, pages = pages)
|
||||||
val file = ClassPathResource("archives/$fileName")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||||
|
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/x-7z-compressed")
|
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||||
assertThat(media.status).isEqualTo(Media.Status.UNSUPPORTED)
|
|
||||||
}
|
|
||||||
|
|
||||||
@ParameterizedTest
|
assertThat(hashedMedia.pages).hasSize(1)
|
||||||
@ValueSource(
|
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
|
||||||
strings = [
|
}
|
||||||
"zip.zip", "zip-bzip2.zip", "zip-copy.zip", "zip-deflate64.zip", "zip-lzma.zip", "zip-ppmd.zip",
|
|
||||||
],
|
|
||||||
)
|
|
||||||
fun `given zip archive when analyzing then media status is READY`(fileName: String) {
|
|
||||||
val file = ClassPathResource("archives/$fileName")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
@Test
|
||||||
|
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
|
||||||
|
val book = makeBook("book1")
|
||||||
|
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
|
||||||
|
val media = Media(Media.Status.READY, pages = pages)
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
||||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
||||||
assertThat(media.pages).hasSize(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||||
fun `given zip encrypted archive when analyzing then media status is ERROR`() {
|
|
||||||
val file = ClassPathResource("archives/zip-encrypted.zip")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
assertThat(hashedMedia.pages).hasSize(30)
|
||||||
|
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
|
||||||
|
.hasSize(komgaProperties.pageHashing)
|
||||||
|
.containsOnly("hashed")
|
||||||
|
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
|
||||||
|
.hasSize(komgaProperties.pageHashing)
|
||||||
|
.containsOnly("hashed")
|
||||||
|
assertThat(
|
||||||
|
hashedMedia.pages
|
||||||
|
.drop(komgaProperties.pageHashing)
|
||||||
|
.dropLast(komgaProperties.pageHashing)
|
||||||
|
.map { it.fileHash },
|
||||||
|
).hasSize(30 - (komgaProperties.pageHashing * 2))
|
||||||
|
.containsOnly("")
|
||||||
|
}
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
@Test
|
||||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
fun `given book with already hashed pages when hashing then no hashing is done`() {
|
||||||
}
|
val book = makeBook("book1")
|
||||||
|
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
|
||||||
|
val media = Media(Media.Status.READY, pages = pages)
|
||||||
|
|
||||||
@Test
|
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
||||||
fun `given epub archive when analyzing then media status is READY`() {
|
|
||||||
val file = ClassPathResource("archives/epub3.epub")
|
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
|
||||||
|
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/epub+zip")
|
assertThat(hashedMedia.pages.map { it.fileHash })
|
||||||
assertThat(media.status).isEqualTo(Media.Status.READY)
|
.hasSize(30)
|
||||||
assertThat(media.pages).hasSize(0)
|
.containsOnly("hashed")
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@ParameterizedTest
|
||||||
fun `given broken epub archive when analyzing then media status is ERROR`() {
|
@MethodSource("provideDirectoriesForPageHashing")
|
||||||
val file = ClassPathResource("archives/zip-as-epub.epub")
|
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
|
||||||
val book = Book("book", file.url, LocalDateTime.now())
|
val files = directory.listDirectoryEntries()
|
||||||
|
assertThat(files).hasSize(2)
|
||||||
|
|
||||||
val media = bookAnalyzer.analyze(book, false)
|
val mediaType = "image/${directory.fileName.extension}"
|
||||||
|
|
||||||
assertThat(media.mediaType).isEqualTo("application/zip")
|
val hashes =
|
||||||
assertThat(media.status).isEqualTo(Media.Status.ERROR)
|
files.map {
|
||||||
assertThat(media.pages).hasSize(0)
|
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
assertThat(hashes.first()).isEqualTo(hashes.last())
|
||||||
fun `given book with a single page when hashing then all pages are hashed`() {
|
}
|
||||||
val book = makeBook("book1")
|
|
||||||
val pages = listOf(BookPage("1.jpeg", "image/jpeg"))
|
|
||||||
val media = Media(Media.Status.READY, pages = pages)
|
|
||||||
|
|
||||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
private fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
|
||||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
|
||||||
|
|
||||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
|
||||||
|
|
||||||
assertThat(hashedMedia.pages).hasSize(1)
|
|
||||||
assertThat(hashedMedia.pages.first().fileHash).isEqualTo("hashed")
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `given book with more than 6 pages when hashing then only first and last 3 are hashed`() {
|
|
||||||
val book = makeBook("book1")
|
|
||||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg") }
|
|
||||||
val media = Media(Media.Status.READY, pages = pages)
|
|
||||||
|
|
||||||
every { bookAnalyzer.getPageContent(any(), any()) } returns ByteArray(1)
|
|
||||||
every { bookAnalyzer.hashPage(any(), any()) } returns "hashed"
|
|
||||||
|
|
||||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
|
||||||
|
|
||||||
assertThat(hashedMedia.pages).hasSize(30)
|
|
||||||
assertThat(hashedMedia.pages.take(komgaProperties.pageHashing).map { it.fileHash })
|
|
||||||
.hasSize(komgaProperties.pageHashing)
|
|
||||||
.containsOnly("hashed")
|
|
||||||
assertThat(hashedMedia.pages.takeLast(komgaProperties.pageHashing).map { it.fileHash })
|
|
||||||
.hasSize(komgaProperties.pageHashing)
|
|
||||||
.containsOnly("hashed")
|
|
||||||
assertThat(
|
|
||||||
hashedMedia.pages
|
|
||||||
.drop(komgaProperties.pageHashing)
|
|
||||||
.dropLast(komgaProperties.pageHashing)
|
|
||||||
.map { it.fileHash },
|
|
||||||
).hasSize(30 - (komgaProperties.pageHashing * 2))
|
|
||||||
.containsOnly("")
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
fun `given book with already hashed pages when hashing then no hashing is done`() {
|
|
||||||
val book = makeBook("book1")
|
|
||||||
val pages = (1..30).map { BookPage("$it.jpeg", "image/jpeg", fileHash = "hashed") }
|
|
||||||
val media = Media(Media.Status.READY, pages = pages)
|
|
||||||
|
|
||||||
val hashedMedia = bookAnalyzer.hashPages(BookWithMedia(book, media))
|
|
||||||
|
|
||||||
verify(exactly = 0) { bookAnalyzer.getPageContent(any(), any()) }
|
|
||||||
verify(exactly = 0) { bookAnalyzer.hashPage(any(), any()) }
|
|
||||||
|
|
||||||
assertThat(hashedMedia.pages.map { it.fileHash })
|
|
||||||
.hasSize(30)
|
|
||||||
.containsOnly("hashed")
|
|
||||||
}
|
|
||||||
|
|
||||||
@ParameterizedTest
|
|
||||||
@MethodSource("provideDirectoriesForPageHashing")
|
|
||||||
fun `given 2 exact pages when hashing then hashes are the same`(directory: Path) {
|
|
||||||
val files = directory.listDirectoryEntries()
|
|
||||||
assertThat(files).hasSize(2)
|
|
||||||
|
|
||||||
val mediaType = "image/${directory.fileName.extension}"
|
|
||||||
|
|
||||||
val hashes =
|
|
||||||
files.map {
|
|
||||||
bookAnalyzer.hashPage(BookPage(it.name, mediaType = mediaType), it.inputStream().readBytes())
|
|
||||||
}
|
|
||||||
|
|
||||||
assertThat(hashes.first()).isEqualTo(hashes.last())
|
|
||||||
}
|
|
||||||
|
|
||||||
companion object {
|
|
||||||
@JvmStatic
|
|
||||||
fun provideDirectoriesForPageHashing() = ClassPathResource("hashpage").uri.toPath().listDirectoryEntries()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user