5 // Created by Heberti Almeida on 04/05/15.
6 // Copyright (c) 2015 Folio Reader. All rights reserved.
17 class FREpubParser: NSObject, SSZipArchiveDelegate {
20 private var resourcesBasePath = ""
21 private var shouldRemoveEpub = true
22 private var epubPathToRemove: String?
24 /// Parse the Cover Image from an epub file.
27 /// - epubPath: Epub path on the disk.
28 /// - unzipPath: Path to unzip the compressed epub.
29 /// - Returns: The book cover as UIImage object
30 /// - Throws: `FolioReaderError`
31 func parseCoverImage(_ epubPath: String, unzipPath: String? = nil) throws -> UIImage {
32 guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath),
33 let coverImage = book.coverImage else {
34 throw FolioReaderError.coverNotAvailable
37 guard let image = UIImage(contentsOfFile: coverImage.fullHref) else {
38 throw FolioReaderError.invalidImage(path: coverImage.fullHref)
44 /// Parse the book title from an epub file.
47 /// - epubPath: Epub path on the disk.
48 /// - unzipPath: Path to unzip the compressed epub.
49 /// - Returns: The book title
50 /// - Throws: `FolioReaderError`
51 func parseTitle(_ epubPath: String, unzipPath: String? = nil) throws -> String {
52 guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let title = book.title else {
53 throw FolioReaderError.titleNotAvailable
59 /// Parse the book Author name from an epub file.
62 /// - epubPath: Epub path on the disk.
63 /// - unzipPath: Path to unzip the compressed epub.
64 /// - Returns: The author name
65 /// - Throws: `FolioReaderError`
66 func parseAuthorName(_ epubPath: String, unzipPath: String? = nil) throws -> String {
67 guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let authorName = book.authorName else {
68 throw FolioReaderError.authorNameNotAvailable
73 /// Unzip, delete and read an epub file.
76 /// - withEpubPath: Epub path on the disk
77 /// - removeEpub: Should remove the original file?
78 /// - unzipPath: Path to unzip the compressed epub.
79 /// - Returns: `FRBook` Object
80 /// - Throws: `FolioReaderError`
81 func readEpub(epubPath withEpubPath: String, removeEpub: Bool = true, unzipPath: String? = nil) throws -> FRBook {
82 epubPathToRemove = withEpubPath
83 shouldRemoveEpub = removeEpub
85 var isDir: ObjCBool = false
86 let fileManager = FileManager.default
87 let bookName = withEpubPath.lastPathComponent
90 if let path = unzipPath, fileManager.fileExists(atPath: path) {
93 bookBasePath = kApplicationDocumentsDirectory
96 bookBasePath = bookBasePath.appendingPathComponent(bookName)
98 guard fileManager.fileExists(atPath: withEpubPath) else {
99 throw FolioReaderError.bookNotAvailable
102 // Unzip if necessary
103 let needsUnzip = !fileManager.fileExists(atPath: bookBasePath, isDirectory:&isDir) || !isDir.boolValue
106 SSZipArchive.unzipFile(atPath: withEpubPath, toDestination: bookBasePath, delegate: self)
109 // Skip from backup this folder
110 try addSkipBackupAttributeToItemAtURL(URL(fileURLWithPath: bookBasePath, isDirectory: true))
113 try readContainer(with: bookBasePath)
114 try readOpf(with: bookBasePath)
118 /// Read and parse container.xml file.
120 /// - Parameter bookBasePath: The base book path
121 /// - Throws: `FolioReaderError`
122 private func readContainer(with bookBasePath: String) throws {
123 let containerPath = "META-INF/container.xml"
124 let containerData = try Data(contentsOf: URL(fileURLWithPath: bookBasePath.appendingPathComponent(containerPath)), options: .alwaysMapped)
125 let xmlDoc = try AEXMLDocument(xml: containerData)
126 let opfResource = FRResource()
127 opfResource.href = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"]
128 guard let fullPath = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"] else {
129 throw FolioReaderError.fullPathEmpty
131 opfResource.mediaType = MediaType.by(fileName: fullPath)
132 book.opfResource = opfResource
133 resourcesBasePath = bookBasePath.appendingPathComponent(book.opfResource.href.deletingLastPathComponent)
136 /// Read and parse .opf file.
138 /// - Parameter bookBasePath: The base book path
139 /// - Throws: `FolioReaderError`
140 private func readOpf(with bookBasePath: String) throws {
141 let opfPath = bookBasePath.appendingPathComponent(book.opfResource.href)
142 var identifier: String?
144 let opfData = try Data(contentsOf: URL(fileURLWithPath: opfPath), options: .alwaysMapped)
145 let xmlDoc = try AEXMLDocument(xml: opfData)
148 if let package = xmlDoc.children.first {
149 identifier = package.attributes["unique-identifier"]
151 if let version = package.attributes["version"] {
152 book.version = Double(version)
156 // Parse and save each "manifest item"
157 xmlDoc.root["manifest"]["item"].all?.forEach {
158 let resource = FRResource()
159 resource.id = $0.attributes["id"]
160 resource.properties = $0.attributes["properties"]
161 resource.href = $0.attributes["href"]
162 resource.fullHref = resourcesBasePath.appendingPathComponent(resource.href).removingPercentEncoding
163 resource.mediaType = MediaType.by(name: $0.attributes["media-type"] ?? "", fileName: resource.href)
164 resource.mediaOverlay = $0.attributes["media-overlay"]
166 // if a .smil file is listed in resources, go parse that file now and save it on book model
167 if (resource.mediaType != nil && resource.mediaType == .smil) {
168 readSmilFile(resource)
171 book.resources.add(resource)
174 book.smils.basePath = resourcesBasePath
177 book.metadata = readMetadata(xmlDoc.root["metadata"].children)
179 // Read the book unique identifier
180 if let identifier = identifier, let uniqueIdentifier = book.metadata.find(identifierById: identifier) {
181 book.uniqueIdentifier = uniqueIdentifier.value
184 // Read the cover image
185 let coverImageId = book.metadata.find(byName: "cover")?.content
186 if let coverImageId = coverImageId, let coverResource = book.resources.findById(coverImageId) {
187 book.coverImage = coverResource
188 } else if let coverResource = book.resources.findByProperty("cover-image") {
189 book.coverImage = coverResource
192 // Specific TOC for ePub 2 and 3
193 // Get the first resource with the NCX mediatype
194 if let tocResource = book.resources.findByMediaType(MediaType.ncx) {
195 book.tocResource = tocResource
196 } else if let tocResource = book.resources.findByExtension(MediaType.ncx.defaultExtension) {
197 // Non-standard books may use wrong mediatype, fallback with extension
198 book.tocResource = tocResource
199 } else if let tocResource = book.resources.findByProperty("nav") {
200 book.tocResource = tocResource
203 precondition(book.tocResource != nil, "ERROR: Could not find table of contents resource. The book don't have a TOC resource.")
206 book.tableOfContents = findTableOfContents()
207 book.flatTableOfContents = flatTOC
210 let spine = xmlDoc.root["spine"]
211 book.spine = readSpine(spine.children)
213 // Page progress direction `ltr` or `rtl`
214 if let pageProgressionDirection = spine.attributes["page-progression-direction"] {
215 book.spine.pageProgressionDirection = pageProgressionDirection
219 /// Reads and parses a .smil file.
221 /// - Parameter resource: A `FRResource` to read the smill
222 private func readSmilFile(_ resource: FRResource) {
224 let smilData = try Data(contentsOf: URL(fileURLWithPath: resource.fullHref), options: .alwaysMapped)
225 var smilFile = FRSmilFile(resource: resource)
226 let xmlDoc = try AEXMLDocument(xml: smilData)
228 let children = xmlDoc.root["body"].children
230 if children.count > 0 {
231 smilFile.data.append(contentsOf: readSmilFileElements(children))
234 book.smils.add(smilFile)
236 print("Cannot read .smil file: "+resource.href)
240 private func readSmilFileElements(_ children: [AEXMLElement]) -> [FRSmilElement] {
241 var data = [FRSmilElement]()
243 // convert each smil element to a FRSmil object
245 let smil = FRSmilElement(name: $0.name, attributes: $0.attributes)
247 // if this element has children, convert them to objects too
248 if $0.children.count > 0 {
249 smil.children.append(contentsOf: readSmilFileElements($0.children))
258 /// Read and parse the Table of Contents.
260 /// - Returns: A list of toc references
261 private func findTableOfContents() -> [FRTocReference] {
262 var tableOfContent = [FRTocReference]()
263 var tocItems: [AEXMLElement]?
264 guard let tocResource = book.tocResource else { return tableOfContent }
265 let tocPath = resourcesBasePath.appendingPathComponent(tocResource.href)
268 if tocResource.mediaType == MediaType.ncx {
269 let ncxData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped)
270 let xmlDoc = try AEXMLDocument(xml: ncxData)
271 if let itemsList = xmlDoc.root["navMap"]["navPoint"].all {
275 let tocData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped)
276 let xmlDoc = try AEXMLDocument(xml: tocData)
278 if let nav = xmlDoc.root["body"]["nav"].first, let itemsList = nav["ol"]["li"].all {
280 } else if let nav = findNavTag(xmlDoc.root["body"]), let itemsList = nav["ol"]["li"].all {
285 print("Cannot find Table of Contents.")
288 guard let items = tocItems else { return tableOfContent }
291 guard let ref = readTOCReference(item) else { continue }
292 tableOfContent.append(ref)
295 return tableOfContent
298 /// Recursively finds a `<nav>` tag on html.
300 /// - Parameter element: An `AEXMLElement`, usually the `<body>`
301 /// - Returns: If found the `<nav>` `AEXMLElement`
302 @discardableResult func findNavTag(_ element: AEXMLElement) -> AEXMLElement? {
303 for element in element.children {
304 if let nav = element["nav"].first {
313 fileprivate func readTOCReference(_ navpointElement: AEXMLElement) -> FRTocReference? {
316 if book.tocResource?.mediaType == MediaType.ncx {
317 if let labelText = navpointElement["navLabel"]["text"].value {
321 guard let reference = navpointElement["content"].attributes["src"] else { return nil }
322 let hrefSplit = reference.split {$0 == "#"}.map { String($0) }
323 let fragmentID = hrefSplit.count > 1 ? hrefSplit[1] : ""
324 let href = hrefSplit[0]
326 let resource = book.resources.findByHref(href)
327 let toc = FRTocReference(title: label, resource: resource, fragmentID: fragmentID)
329 // Recursively find child
330 if let navPoints = navpointElement["navPoint"].all {
331 for navPoint in navPoints {
332 guard let item = readTOCReference(navPoint) else { continue }
333 toc.children.append(item)
338 if let labelText = navpointElement["a"].value {
342 guard let reference = navpointElement["a"].attributes["href"] else { return nil }
343 let hrefSplit = reference.split {$0 == "#"}.map { String($0) }
344 let fragmentID = hrefSplit.count > 1 ? hrefSplit[1] : ""
345 let href = hrefSplit[0]
347 let resource = book.resources.findByHref(href)
348 let toc = FRTocReference(title: label, resource: resource, fragmentID: fragmentID)
350 // Recursively find child
351 if let navPoints = navpointElement["ol"]["li"].all {
352 for navPoint in navPoints {
353 guard let item = readTOCReference(navPoint) else { continue }
354 toc.children.append(item)
361 // MARK: - Recursive add items to a list
363 var flatTOC: [FRTocReference] {
364 var tocItems = [FRTocReference]()
366 for item in book.tableOfContents {
367 tocItems.append(item)
368 tocItems.append(contentsOf: countTocChild(item))
373 func countTocChild(_ item: FRTocReference) -> [FRTocReference] {
374 var tocItems = [FRTocReference]()
376 item.children.forEach {
382 /// Read and parse <metadata>.
384 /// - Parameter tags: XHTML tags
385 /// - Returns: Metadata object
386 fileprivate func readMetadata(_ tags: [AEXMLElement]) -> FRMetadata {
387 let metadata = FRMetadata()
390 if tag.name == "dc:title" {
391 metadata.titles.append(tag.value ?? "")
394 if tag.name == "dc:identifier" {
395 let identifier = Identifier(id: tag.attributes["id"], scheme: tag.attributes["opf:scheme"], value: tag.value)
396 metadata.identifiers.append(identifier)
399 if tag.name == "dc:language" {
400 let language = tag.value ?? metadata.language
401 metadata.language = language != "en" ? language : metadata.language
404 if tag.name == "dc:creator" {
405 metadata.creators.append(Author(name: tag.value ?? "", role: tag.attributes["opf:role"] ?? "", fileAs: tag.attributes["opf:file-as"] ?? ""))
408 if tag.name == "dc:contributor" {
409 metadata.creators.append(Author(name: tag.value ?? "", role: tag.attributes["opf:role"] ?? "", fileAs: tag.attributes["opf:file-as"] ?? ""))
412 if tag.name == "dc:publisher" {
413 metadata.publishers.append(tag.value ?? "")
416 if tag.name == "dc:description" {
417 metadata.descriptions.append(tag.value ?? "")
420 if tag.name == "dc:subject" {
421 metadata.subjects.append(tag.value ?? "")
424 if tag.name == "dc:rights" {
425 metadata.rights.append(tag.value ?? "")
428 if tag.name == "dc:date" {
429 metadata.dates.append(EventDate(date: tag.value ?? "", event: tag.attributes["opf:event"] ?? ""))
432 if tag.name == "meta" {
433 if tag.attributes["name"] != nil {
434 metadata.metaAttributes.append(Meta(name: tag.attributes["name"], content: tag.attributes["content"]))
437 if tag.attributes["property"] != nil && tag.attributes["id"] != nil {
438 metadata.metaAttributes.append(Meta(id: tag.attributes["id"], property: tag.attributes["property"], value: tag.value))
441 if tag.attributes["property"] != nil {
442 metadata.metaAttributes.append(Meta(property: tag.attributes["property"], value: tag.value, refines: tag.attributes["refines"]))
449 /// Read and parse <spine>.
451 /// - Parameter tags: XHTML tags
452 /// - Returns: Spine object
453 fileprivate func readSpine(_ tags: [AEXMLElement]) -> FRSpine {
454 let spine = FRSpine()
457 guard let idref = tag.attributes["idref"] else { continue }
460 if tag.attributes["linear"] != nil {
461 linear = tag.attributes["linear"] == "yes" ? true : false
464 if book.resources.containsById(idref) {
465 guard let resource = book.resources.findById(idref) else { continue }
466 spine.spineReferences.append(Spine(resource: resource, linear: linear))
472 /// Skip a file from iCloud backup.
474 /// - Parameter url: File URL
475 /// - Throws: Error if not possible
476 fileprivate func addSkipBackupAttributeToItemAtURL(_ url: URL) throws {
477 assert(FileManager.default.fileExists(atPath: url.path))
479 var urlToExclude = url
480 var resourceValues = URLResourceValues()
481 resourceValues.isExcludedFromBackup = true
482 try urlToExclude.setResourceValues(resourceValues)
485 // MARK: - SSZipArchive delegate
487 func zipArchiveWillUnzipArchive(atPath path: String, zipInfo: unz_global_info) {
488 guard shouldRemoveEpub else { return }
489 guard let epubPathToRemove = epubPathToRemove else { return }
490 try? FileManager.default.removeItem(atPath: epubPathToRemove)