// // FREpubParser.swift // FolioReaderKit // // Created by Heberti Almeida on 04/05/15. // Copyright (c) 2015 Folio Reader. All rights reserved. // import UIKit import AEXML #if COCOAPODS import SSZipArchive #else import ZipArchive #endif class FREpubParser: NSObject, SSZipArchiveDelegate { let book = FRBook() private var resourcesBasePath = "" private var shouldRemoveEpub = true private var epubPathToRemove: String? /// Parse the Cover Image from an epub file. /// /// - Parameters: /// - epubPath: Epub path on the disk. /// - unzipPath: Path to unzip the compressed epub. /// - Returns: The book cover as UIImage object /// - Throws: `FolioReaderError` func parseCoverImage(_ epubPath: String, unzipPath: String? = nil) throws -> UIImage { guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let coverImage = book.coverImage else { throw FolioReaderError.coverNotAvailable } guard let image = UIImage(contentsOfFile: coverImage.fullHref) else { throw FolioReaderError.invalidImage(path: coverImage.fullHref) } return image } /// Parse the book title from an epub file. /// /// - Parameters: /// - epubPath: Epub path on the disk. /// - unzipPath: Path to unzip the compressed epub. /// - Returns: The book title /// - Throws: `FolioReaderError` func parseTitle(_ epubPath: String, unzipPath: String? = nil) throws -> String { guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let title = book.title else { throw FolioReaderError.titleNotAvailable } return title } /// Parse the book Author name from an epub file. /// /// - Parameters: /// - epubPath: Epub path on the disk. /// - unzipPath: Path to unzip the compressed epub. /// - Returns: The author name /// - Throws: `FolioReaderError` func parseAuthorName(_ epubPath: String, unzipPath: String? = nil) throws -> String { guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let authorName = book.authorName else { throw FolioReaderError.authorNameNotAvailable } return authorName } /// Unzip, delete and read an epub file. /// /// - Parameters: /// - withEpubPath: Epub path on the disk /// - removeEpub: Should remove the original file? /// - unzipPath: Path to unzip the compressed epub. /// - Returns: `FRBook` Object /// - Throws: `FolioReaderError` func readEpub(epubPath withEpubPath: String, removeEpub: Bool = true, unzipPath: String? = nil) throws -> FRBook { epubPathToRemove = withEpubPath shouldRemoveEpub = removeEpub var isDir: ObjCBool = false let fileManager = FileManager.default let bookName = withEpubPath.lastPathComponent var bookBasePath = "" if let path = unzipPath, fileManager.fileExists(atPath: path) { bookBasePath = path } else { bookBasePath = kApplicationDocumentsDirectory } bookBasePath = bookBasePath.appendingPathComponent(bookName) guard fileManager.fileExists(atPath: withEpubPath) else { throw FolioReaderError.bookNotAvailable } // Unzip if necessary let needsUnzip = !fileManager.fileExists(atPath: bookBasePath, isDirectory:&isDir) || !isDir.boolValue if needsUnzip { SSZipArchive.unzipFile(atPath: withEpubPath, toDestination: bookBasePath, delegate: self) } // Skip from backup this folder try addSkipBackupAttributeToItemAtURL(URL(fileURLWithPath: bookBasePath, isDirectory: true)) book.name = bookName try readContainer(with: bookBasePath) try readOpf(with: bookBasePath) return self.book } /// Read and parse container.xml file. /// /// - Parameter bookBasePath: The base book path /// - Throws: `FolioReaderError` private func readContainer(with bookBasePath: String) throws { let containerPath = "META-INF/container.xml" let containerData = try Data(contentsOf: URL(fileURLWithPath: bookBasePath.appendingPathComponent(containerPath)), options: .alwaysMapped) let xmlDoc = try AEXMLDocument(xml: containerData) let opfResource = FRResource() opfResource.href = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"] guard let fullPath = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"] else { throw FolioReaderError.fullPathEmpty } opfResource.mediaType = MediaType.by(fileName: fullPath) book.opfResource = opfResource resourcesBasePath = bookBasePath.appendingPathComponent(book.opfResource.href.deletingLastPathComponent) } /// Read and parse .opf file. /// /// - Parameter bookBasePath: The base book path /// - Throws: `FolioReaderError` private func readOpf(with bookBasePath: String) throws { let opfPath = bookBasePath.appendingPathComponent(book.opfResource.href) var identifier: String? let opfData = try Data(contentsOf: URL(fileURLWithPath: opfPath), options: .alwaysMapped) let xmlDoc = try AEXMLDocument(xml: opfData) // Base OPF info if let package = xmlDoc.children.first { identifier = package.attributes["unique-identifier"] if let version = package.attributes["version"] { book.version = Double(version) } } // Parse and save each "manifest item" xmlDoc.root["manifest"]["item"].all?.forEach { let resource = FRResource() resource.id = $0.attributes["id"] resource.properties = $0.attributes["properties"] resource.href = $0.attributes["href"] resource.fullHref = resourcesBasePath.appendingPathComponent(resource.href).removingPercentEncoding resource.mediaType = MediaType.by(name: $0.attributes["media-type"] ?? "", fileName: resource.href) resource.mediaOverlay = $0.attributes["media-overlay"] // if a .smil file is listed in resources, go parse that file now and save it on book model if (resource.mediaType != nil && resource.mediaType == .smil) { readSmilFile(resource) } book.resources.add(resource) } book.smils.basePath = resourcesBasePath // Read metadata book.metadata = readMetadata(xmlDoc.root["metadata"].children) // Read the book unique identifier if let identifier = identifier, let uniqueIdentifier = book.metadata.find(identifierById: identifier) { book.uniqueIdentifier = uniqueIdentifier.value } // Read the cover image let coverImageId = book.metadata.find(byName: "cover")?.content if let coverImageId = coverImageId, let coverResource = book.resources.findById(coverImageId) { book.coverImage = coverResource } else if let coverResource = book.resources.findByProperty("cover-image") { book.coverImage = coverResource } // Specific TOC for ePub 2 and 3 // Get the first resource with the NCX mediatype if let tocResource = book.resources.findByMediaType(MediaType.ncx) { book.tocResource = tocResource } else if let tocResource = book.resources.findByExtension(MediaType.ncx.defaultExtension) { // Non-standard books may use wrong mediatype, fallback with extension book.tocResource = tocResource } else if let tocResource = book.resources.findByProperty("nav") { book.tocResource = tocResource } precondition(book.tocResource != nil, "ERROR: Could not find table of contents resource. The book don't have a TOC resource.") // The book TOC book.tableOfContents = findTableOfContents() book.flatTableOfContents = flatTOC // Read Spine let spine = xmlDoc.root["spine"] book.spine = readSpine(spine.children) // Page progress direction `ltr` or `rtl` if let pageProgressionDirection = spine.attributes["page-progression-direction"] { book.spine.pageProgressionDirection = pageProgressionDirection } } /// Reads and parses a .smil file. /// /// - Parameter resource: A `FRResource` to read the smill private func readSmilFile(_ resource: FRResource) { do { let smilData = try Data(contentsOf: URL(fileURLWithPath: resource.fullHref), options: .alwaysMapped) var smilFile = FRSmilFile(resource: resource) let xmlDoc = try AEXMLDocument(xml: smilData) let children = xmlDoc.root["body"].children if children.count > 0 { smilFile.data.append(contentsOf: readSmilFileElements(children)) } book.smils.add(smilFile) } catch { print("Cannot read .smil file: "+resource.href) } } private func readSmilFileElements(_ children: [AEXMLElement]) -> [FRSmilElement] { var data = [FRSmilElement]() // convert each smil element to a FRSmil object children.forEach{ let smil = FRSmilElement(name: $0.name, attributes: $0.attributes) // if this element has children, convert them to objects too if $0.children.count > 0 { smil.children.append(contentsOf: readSmilFileElements($0.children)) } data.append(smil) } return data } /// Read and parse the Table of Contents. /// /// - Returns: A list of toc references private func findTableOfContents() -> [FRTocReference] { var tableOfContent = [FRTocReference]() var tocItems: [AEXMLElement]? guard let tocResource = book.tocResource else { return tableOfContent } let tocPath = resourcesBasePath.appendingPathComponent(tocResource.href) do { if tocResource.mediaType == MediaType.ncx { let ncxData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped) let xmlDoc = try AEXMLDocument(xml: ncxData) if let itemsList = xmlDoc.root["navMap"]["navPoint"].all { tocItems = itemsList } } else { let tocData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped) let xmlDoc = try AEXMLDocument(xml: tocData) if let nav = xmlDoc.root["body"]["nav"].first, let itemsList = nav["ol"]["li"].all { tocItems = itemsList } else if let nav = findNavTag(xmlDoc.root["body"]), let itemsList = nav["ol"]["li"].all { tocItems = itemsList } } } catch { print("Cannot find Table of Contents.") } guard let items = tocItems else { return tableOfContent } for item in items { guard let ref = readTOCReference(item) else { continue } tableOfContent.append(ref) } return tableOfContent } /// Recursively finds a `