perf: faster zip entry extraction

This commit is contained in:
Snd-R 2024-08-26 05:58:04 +03:00 committed by GitHub
parent 6794e2490c
commit eeb5898210
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 45 additions and 15 deletions

View File

@ -84,7 +84,7 @@ dependencies {
implementation("com.appmattus.crypto:cryptohash:0.10.1")
implementation("org.apache.tika:tika-core:2.9.1")
implementation("org.apache.commons:commons-compress:1.25.0")
implementation("org.apache.commons:commons-compress:1.27.1")
implementation("com.github.junrar:junrar:7.5.5")
implementation("com.github.gotson.nightcompress:nightcompress:0.2.0")
implementation("org.apache.pdfbox:pdfbox:3.0.1")

View File

@ -8,6 +8,8 @@ import org.gotson.komga.domain.model.MediaContainerEntry
import org.gotson.komga.domain.model.MediaType
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.util.getZipEntryBytes
import org.gotson.komga.infrastructure.util.use
import org.springframework.stereotype.Service
import java.nio.file.Path
@ -26,7 +28,7 @@ class ZipExtractor(
path: Path,
analyzeDimensions: Boolean,
): List<MediaContainerEntry> =
ZipFile(path.toFile()).use { zip ->
ZipFile.builder().setPath(path).use { zip ->
zip.entries.toList()
.filter { !it.isDirectory }
.map { entry ->
@ -52,8 +54,5 @@ class ZipExtractor(
override fun getEntryStream(
path: Path,
entryName: String,
): ByteArray =
ZipFile(path.toFile()).use { zip ->
zip.getInputStream(zip.getEntry(entryName)).use { it.readBytes() }
}
): ByteArray = getZipEntryBytes(path, entryName)
}

View File

@ -2,6 +2,7 @@ package org.gotson.komga.infrastructure.mediacontainer.epub
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.MediaUnsupportedException
import org.gotson.komga.infrastructure.util.use
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.parser.Parser
@ -16,7 +17,7 @@ data class EpubPackage(
)
inline fun <R> Path.epub(block: (EpubPackage) -> R): R =
ZipFile(this.toFile()).use { zip ->
ZipFile.builder().setPath(this).use { zip ->
val opfFile = zip.getPackagePath()
val opfDoc = zip.getInputStream(zip.getEntry(opfFile)).use { Jsoup.parse(it, null, "", Parser.xmlParser()) }
val opfDir = Paths.get(opfFile).parent
@ -30,7 +31,7 @@ fun ZipFile.getPackagePath(): String =
}
fun getPackageFile(path: Path): String? =
ZipFile(path.toFile()).use { zip ->
ZipFile.builder().setPath(path).use { zip ->
try {
zip.getInputStream(zip.getEntry(zip.getPackagePath())).reader().use { it.readText() }
} catch (e: Exception) {

View File

@ -2,15 +2,14 @@ package org.gotson.komga.infrastructure.mediacontainer.epub
import io.github.oshai.kotlinlogging.KotlinLogging
import org.apache.commons.compress.archivers.ArchiveEntry
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.BookPage
import org.gotson.komga.domain.model.EntryNotFoundException
import org.gotson.komga.domain.model.EpubTocEntry
import org.gotson.komga.domain.model.MediaFile
import org.gotson.komga.domain.model.R2Locator
import org.gotson.komga.domain.model.TypedBytes
import org.gotson.komga.infrastructure.image.ImageAnalyzer
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
import org.gotson.komga.infrastructure.util.getZipEntryBytes
import org.jsoup.Jsoup
import org.springframework.beans.factory.annotation.Value
import org.springframework.stereotype.Service
@ -35,11 +34,7 @@ class EpubExtractor(
fun getEntryStream(
path: Path,
entryName: String,
): ByteArray =
ZipFile(path.toFile()).use { zip ->
zip.getEntry(entryName)?.let { entry -> zip.getInputStream(entry).use { it.readBytes() } }
?: throw EntryNotFoundException("Entry does not exist: $entryName")
}
): ByteArray = getZipEntryBytes(path, entryName)
fun isEpub(path: Path): Boolean =
try {

View File

@ -0,0 +1,35 @@
package org.gotson.komga.infrastructure.util
import org.apache.commons.compress.archivers.zip.ZipFile
import org.gotson.komga.domain.model.EntryNotFoundException
import java.nio.file.Path
inline fun <R> ZipFile.Builder.use(block: (ZipFile) -> R) = this.get().use(block)
fun getZipEntryBytes(
path: Path,
entryName: String,
): ByteArray {
// fast path. Only read central directory record and try to find entry in it
val zipBuilder =
ZipFile.builder()
.setPath(path)
.setUseUnicodeExtraFields(true)
.setIgnoreLocalFileHeader(true)
val bytes = zipBuilder.use { it.getEntryBytes(entryName) }
if (bytes != null) return bytes
// slow path. Entry with that name wasn't in central directory record
// Iterate each entry and, if present, set name from Unicode extra field in local file header
return zipBuilder.setIgnoreLocalFileHeader(false).use {
it.getEntryBytes(entryName)
?: throw EntryNotFoundException("Entry does not exist: $entryName")
}
}
private fun ZipFile.getEntryBytes(entryName: String) =
this.use { zip ->
zip.getEntry(entryName)?.let { entry ->
zip.getInputStream(entry).use { it.readBytes() }
}
}