iOS/Pods/FolioReaderKit/Source/EPUBCore/FREpubParser.swift

   1 //
   2 //  FREpubParser.swift
   3 //  FolioReaderKit
   4 //
   5 //  Created by Heberti Almeida on 04/05/15.
   6 //  Copyright (c) 2015 Folio Reader. All rights reserved.
   7 //
   8
   9 import UIKit
  10 import AEXML
  11 #if COCOAPODS
  12 import SSZipArchive
  13 #else
  14 import ZipArchive
  15 #endif
  16
  17 class FREpubParser: NSObject, SSZipArchiveDelegate {
  18
  19     let book = FRBook()
  20     private var resourcesBasePath = ""
  21     private var shouldRemoveEpub = true
  22     private var epubPathToRemove: String?
  23
  24     /// Parse the Cover Image from an epub file.
  25     ///
  26     /// - Parameters:
  27     ///   - epubPath: Epub path on the disk.
  28     ///   - unzipPath: Path to unzip the compressed epub.
  29     /// - Returns: The book cover as UIImage object
  30     /// - Throws: `FolioReaderError`
  31     func parseCoverImage(_ epubPath: String, unzipPath: String? = nil) throws -> UIImage {
  32         guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath),
  33             let coverImage = book.coverImage else {
  34                 throw FolioReaderError.coverNotAvailable
  35         }
  36
  37         guard let image = UIImage(contentsOfFile: coverImage.fullHref) else {
  38             throw FolioReaderError.invalidImage(path: coverImage.fullHref)
  39         }
  40
  41         return image
  42     }
  43
  44     /// Parse the book title from an epub file.
  45     ///
  46     /// - Parameters:
  47     ///   - epubPath: Epub path on the disk.
  48     ///   - unzipPath: Path to unzip the compressed epub.
  49     /// - Returns: The book title
  50     /// - Throws: `FolioReaderError`
  51     func parseTitle(_ epubPath: String, unzipPath: String? = nil) throws -> String {
  52         guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let title = book.title else {
  53              throw FolioReaderError.titleNotAvailable
  54         }
  55         return title
  56     }
  57
  58
  59     /// Parse the book Author name from an epub file.
  60     ///
  61     /// - Parameters:
  62     ///   - epubPath: Epub path on the disk.
  63     ///   - unzipPath: Path to unzip the compressed epub.
  64     /// - Returns: The author name
  65     /// - Throws: `FolioReaderError`
  66     func parseAuthorName(_ epubPath: String, unzipPath: String? = nil) throws -> String {
  67         guard let book = try? readEpub(epubPath: epubPath, removeEpub: false, unzipPath: unzipPath), let authorName = book.authorName else {
  68             throw FolioReaderError.authorNameNotAvailable
  69         }
  70         return authorName
  71     }
  72
  73     /// Unzip, delete and read an epub file.
  74     ///
  75     /// - Parameters:
  76     ///   - withEpubPath: Epub path on the disk
  77     ///   - removeEpub: Should remove the original file?
  78     ///   - unzipPath: Path to unzip the compressed epub.
  79     /// - Returns: `FRBook` Object
  80     /// - Throws: `FolioReaderError`
  81     func readEpub(epubPath withEpubPath: String, removeEpub: Bool = true, unzipPath: String? = nil) throws -> FRBook {
  82         epubPathToRemove = withEpubPath
  83         shouldRemoveEpub = removeEpub
  84
  85         var isDir: ObjCBool = false
  86         let fileManager = FileManager.default
  87         let bookName = withEpubPath.lastPathComponent
  88         var bookBasePath = ""
  89
  90         if let path = unzipPath, fileManager.fileExists(atPath: path) {
  91             bookBasePath = path
  92         } else {
  93             bookBasePath = kApplicationDocumentsDirectory
  94         }
  95
  96         bookBasePath = bookBasePath.appendingPathComponent(bookName)
  97
  98         guard fileManager.fileExists(atPath: withEpubPath) else {
  99             throw FolioReaderError.bookNotAvailable
 100         }
 101
 102         // Unzip if necessary
 103         let needsUnzip = !fileManager.fileExists(atPath: bookBasePath, isDirectory:&isDir) || !isDir.boolValue
 104
 105         if needsUnzip {
 106             SSZipArchive.unzipFile(atPath: withEpubPath, toDestination: bookBasePath, delegate: self)
 107         }
 108
 109         // Skip from backup this folder
 110         try addSkipBackupAttributeToItemAtURL(URL(fileURLWithPath: bookBasePath, isDirectory: true))
 111
 112         book.name = bookName
 113         try readContainer(with: bookBasePath)
 114         try readOpf(with: bookBasePath)
 115         return self.book
 116     }
 117
 118     /// Read and parse container.xml file.
 119     ///
 120     /// - Parameter bookBasePath: The base book path
 121     /// - Throws: `FolioReaderError`
 122     private func readContainer(with bookBasePath: String) throws {
 123         let containerPath = "META-INF/container.xml"
 124         let containerData = try Data(contentsOf: URL(fileURLWithPath: bookBasePath.appendingPathComponent(containerPath)), options: .alwaysMapped)
 125         let xmlDoc = try AEXMLDocument(xml: containerData)
 126         let opfResource = FRResource()
 127         opfResource.href = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"]
 128         guard let fullPath = xmlDoc.root["rootfiles"]["rootfile"].attributes["full-path"] else {
 129             throw FolioReaderError.fullPathEmpty
 130         }
 131         opfResource.mediaType = MediaType.by(fileName: fullPath)
 132         book.opfResource = opfResource
 133         resourcesBasePath = bookBasePath.appendingPathComponent(book.opfResource.href.deletingLastPathComponent)
 134     }
 135
 136     /// Read and parse .opf file.
 137     ///
 138     /// - Parameter bookBasePath: The base book path
 139     /// - Throws: `FolioReaderError`
 140     private func readOpf(with bookBasePath: String) throws {
 141         let opfPath = bookBasePath.appendingPathComponent(book.opfResource.href)
 142         var identifier: String?
 143
 144         let opfData = try Data(contentsOf: URL(fileURLWithPath: opfPath), options: .alwaysMapped)
 145         let xmlDoc = try AEXMLDocument(xml: opfData)
 146
 147         // Base OPF info
 148         if let package = xmlDoc.children.first {
 149             identifier = package.attributes["unique-identifier"]
 150
 151             if let version = package.attributes["version"] {
 152                 book.version = Double(version)
 153             }
 154         }
 155
 156         // Parse and save each "manifest item"
 157         xmlDoc.root["manifest"]["item"].all?.forEach {
 158             let resource = FRResource()
 159             resource.id = $0.attributes["id"]
 160             resource.properties = $0.attributes["properties"]
 161             resource.href = $0.attributes["href"]
 162             resource.fullHref = resourcesBasePath.appendingPathComponent(resource.href).removingPercentEncoding
 163             resource.mediaType = MediaType.by(name: $0.attributes["media-type"] ?? "", fileName: resource.href)
 164             resource.mediaOverlay = $0.attributes["media-overlay"]
 165
 166             // if a .smil file is listed in resources, go parse that file now and save it on book model
 167             if (resource.mediaType != nil && resource.mediaType == .smil) {
 168                 readSmilFile(resource)
 169             }
 170
 171             book.resources.add(resource)
 172         }
 173
 174         book.smils.basePath = resourcesBasePath
 175
 176         // Read metadata
 177         book.metadata = readMetadata(xmlDoc.root["metadata"].children)
 178
 179         // Read the book unique identifier
 180         if let identifier = identifier, let uniqueIdentifier = book.metadata.find(identifierById: identifier) {
 181             book.uniqueIdentifier = uniqueIdentifier.value
 182         }
 183
 184         // Read the cover image
 185         let coverImageId = book.metadata.find(byName: "cover")?.content
 186         if let coverImageId = coverImageId, let coverResource = book.resources.findById(coverImageId) {
 187             book.coverImage = coverResource
 188         } else if let coverResource = book.resources.findByProperty("cover-image") {
 189             book.coverImage = coverResource
 190         }
 191
 192         // Specific TOC for ePub 2 and 3
 193         // Get the first resource with the NCX mediatype
 194         if let tocResource = book.resources.findByMediaType(MediaType.ncx) {
 195             book.tocResource = tocResource
 196         } else if let tocResource = book.resources.findByExtension(MediaType.ncx.defaultExtension) {
 197             // Non-standard books may use wrong mediatype, fallback with extension
 198             book.tocResource = tocResource
 199         } else if let tocResource = book.resources.findByProperty("nav") {
 200             book.tocResource = tocResource
 201         }
 202
 203         precondition(book.tocResource != nil, "ERROR: Could not find table of contents resource. The book don't have a TOC resource.")
 204
 205         // The book TOC
 206         book.tableOfContents = findTableOfContents()
 207         book.flatTableOfContents = flatTOC
 208
 209         // Read Spine
 210         let spine = xmlDoc.root["spine"]
 211         book.spine = readSpine(spine.children)
 212
 213         // Page progress direction `ltr` or `rtl`
 214         if let pageProgressionDirection = spine.attributes["page-progression-direction"] {
 215             book.spine.pageProgressionDirection = pageProgressionDirection
 216         }
 217     }
 218
 219     /// Reads and parses a .smil file.
 220     ///
 221     /// - Parameter resource: A `FRResource` to read the smill
 222     private func readSmilFile(_ resource: FRResource) {
 223         do {
 224             let smilData = try Data(contentsOf: URL(fileURLWithPath: resource.fullHref), options: .alwaysMapped)
 225             var smilFile = FRSmilFile(resource: resource)
 226             let xmlDoc = try AEXMLDocument(xml: smilData)
 227
 228             let children = xmlDoc.root["body"].children
 229
 230             if children.count > 0 {
 231                 smilFile.data.append(contentsOf: readSmilFileElements(children))
 232             }
 233
 234             book.smils.add(smilFile)
 235         } catch {
 236             print("Cannot read .smil file: "+resource.href)
 237         }
 238     }
 239
 240     private func readSmilFileElements(_ children: [AEXMLElement]) -> [FRSmilElement] {
 241         var data = [FRSmilElement]()
 242
 243         // convert each smil element to a FRSmil object
 244         children.forEach{
 245             let smil = FRSmilElement(name: $0.name, attributes: $0.attributes)
 246
 247             // if this element has children, convert them to objects too
 248             if $0.children.count > 0 {
 249                 smil.children.append(contentsOf: readSmilFileElements($0.children))
 250             }
 251
 252             data.append(smil)
 253         }
 254
 255         return data
 256     }
 257
 258     /// Read and parse the Table of Contents.
 259     ///
 260     /// - Returns: A list of toc references
 261     private func findTableOfContents() -> [FRTocReference] {
 262         var tableOfContent = [FRTocReference]()
 263         var tocItems: [AEXMLElement]?
 264         guard let tocResource = book.tocResource else { return tableOfContent }
 265         let tocPath = resourcesBasePath.appendingPathComponent(tocResource.href)
 266
 267         do {
 268             if tocResource.mediaType == MediaType.ncx {
 269                 let ncxData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped)
 270                 let xmlDoc = try AEXMLDocument(xml: ncxData)
 271                 if let itemsList = xmlDoc.root["navMap"]["navPoint"].all {
 272                     tocItems = itemsList
 273                 }
 274             } else {
 275                 let tocData = try Data(contentsOf: URL(fileURLWithPath: tocPath), options: .alwaysMapped)
 276                 let xmlDoc = try AEXMLDocument(xml: tocData)
 277
 278                 if let nav = xmlDoc.root["body"]["nav"].first, let itemsList = nav["ol"]["li"].all {
 279                     tocItems = itemsList
 280                 } else if let nav = findNavTag(xmlDoc.root["body"]), let itemsList = nav["ol"]["li"].all {
 281                     tocItems = itemsList
 282                 }
 283             }
 284         } catch {
 285             print("Cannot find Table of Contents.")
 286         }
 287
 288         guard let items = tocItems else { return tableOfContent }
 289
 290         for item in items {
 291             guard let ref = readTOCReference(item) else { continue }
 292             tableOfContent.append(ref)
 293         }
 294
 295         return tableOfContent
 296     }
 297
 298     /// Recursively finds a `<nav>` tag on html.
 299     ///
 300     /// - Parameter element: An `AEXMLElement`, usually the `<body>`
 301     /// - Returns: If found the `<nav>` `AEXMLElement`
 302     @discardableResult func findNavTag(_ element: AEXMLElement) -> AEXMLElement? {
 303         for element in element.children {
 304             if let nav = element["nav"].first {
 305                 return nav
 306             } else {
 307                 findNavTag(element)
 308             }
 309         }
 310         return nil
 311     }
 312
 313     fileprivate func readTOCReference(_ navpointElement: AEXMLElement) -> FRTocReference? {
 314         var label = ""
 315
 316         if book.tocResource?.mediaType == MediaType.ncx {
 317             if let labelText = navpointElement["navLabel"]["text"].value {
 318                 label = labelText
 319             }
 320
 321             guard let reference = navpointElement["content"].attributes["src"] else { return nil }
 322             let hrefSplit = reference.split {$0 == "#"}.map { String($0) }
 323             let fragmentID = hrefSplit.count > 1 ? hrefSplit[1] : ""
 324             let href = hrefSplit[0]
 325
 326             let resource = book.resources.findByHref(href)
 327             let toc = FRTocReference(title: label, resource: resource, fragmentID: fragmentID)
 328
 329             // Recursively find child
 330             if let navPoints = navpointElement["navPoint"].all {
 331                 for navPoint in navPoints {
 332                     guard let item = readTOCReference(navPoint) else { continue }
 333                     toc.children.append(item)
 334                 }
 335             }
 336             return toc
 337         } else {
 338             if let labelText = navpointElement["a"].value {
 339                 label = labelText
 340             }
 341
 342             guard let reference = navpointElement["a"].attributes["href"] else { return nil }
 343             let hrefSplit = reference.split {$0 == "#"}.map { String($0) }
 344             let fragmentID = hrefSplit.count > 1 ? hrefSplit[1] : ""
 345             let href = hrefSplit[0]
 346
 347             let resource = book.resources.findByHref(href)
 348             let toc = FRTocReference(title: label, resource: resource, fragmentID: fragmentID)
 349
 350             // Recursively find child
 351             if let navPoints = navpointElement["ol"]["li"].all {
 352                 for navPoint in navPoints {
 353                     guard let item = readTOCReference(navPoint) else { continue }
 354                     toc.children.append(item)
 355                 }
 356             }
 357             return toc
 358         }
 359     }
 360
 361     // MARK: - Recursive add items to a list
 362
 363     var flatTOC: [FRTocReference] {
 364         var tocItems = [FRTocReference]()
 365
 366         for item in book.tableOfContents {
 367             tocItems.append(item)
 368             tocItems.append(contentsOf: countTocChild(item))
 369         }
 370         return tocItems
 371     }
 372
 373     func countTocChild(_ item: FRTocReference) -> [FRTocReference] {
 374         var tocItems = [FRTocReference]()
 375
 376         item.children.forEach {
 377             tocItems.append($0)
 378         }
 379         return tocItems
 380     }
 381
 382     /// Read and parse <metadata>.
 383     ///
 384     /// - Parameter tags: XHTML tags
 385     /// - Returns: Metadata object
 386     fileprivate func readMetadata(_ tags: [AEXMLElement]) -> FRMetadata {
 387         let metadata = FRMetadata()
 388
 389         for tag in tags {
 390             if tag.name == "dc:title" {
 391                 metadata.titles.append(tag.value ?? "")
 392             }
 393
 394             if tag.name == "dc:identifier" {
 395                 let identifier = Identifier(id: tag.attributes["id"], scheme: tag.attributes["opf:scheme"], value: tag.value)
 396                 metadata.identifiers.append(identifier)
 397             }
 398
 399             if tag.name == "dc:language" {
 400                 let language = tag.value ?? metadata.language
 401                 metadata.language = language != "en" ? language : metadata.language
 402             }
 403
 404             if tag.name == "dc:creator" {
 405                 metadata.creators.append(Author(name: tag.value ?? "", role: tag.attributes["opf:role"] ?? "", fileAs: tag.attributes["opf:file-as"] ?? ""))
 406             }
 407
 408             if tag.name == "dc:contributor" {
 409                 metadata.creators.append(Author(name: tag.value ?? "", role: tag.attributes["opf:role"] ?? "", fileAs: tag.attributes["opf:file-as"] ?? ""))
 410             }
 411
 412             if tag.name == "dc:publisher" {
 413                 metadata.publishers.append(tag.value ?? "")
 414             }
 415
 416             if tag.name == "dc:description" {
 417                 metadata.descriptions.append(tag.value ?? "")
 418             }
 419
 420             if tag.name == "dc:subject" {
 421                 metadata.subjects.append(tag.value ?? "")
 422             }
 423
 424             if tag.name == "dc:rights" {
 425                 metadata.rights.append(tag.value ?? "")
 426             }
 427
 428             if tag.name == "dc:date" {
 429                 metadata.dates.append(EventDate(date: tag.value ?? "", event: tag.attributes["opf:event"] ?? ""))
 430             }
 431
 432             if tag.name == "meta" {
 433                 if tag.attributes["name"] != nil {
 434                     metadata.metaAttributes.append(Meta(name: tag.attributes["name"], content: tag.attributes["content"]))
 435                 }
 436
 437                 if tag.attributes["property"] != nil && tag.attributes["id"] != nil {
 438                     metadata.metaAttributes.append(Meta(id: tag.attributes["id"], property: tag.attributes["property"], value: tag.value))
 439                 }
 440
 441                 if tag.attributes["property"] != nil {
 442                     metadata.metaAttributes.append(Meta(property: tag.attributes["property"], value: tag.value, refines: tag.attributes["refines"]))
 443                 }
 444             }
 445         }
 446         return metadata
 447     }
 448
 449     /// Read and parse <spine>.
 450     ///
 451     /// - Parameter tags: XHTML tags
 452     /// - Returns: Spine object
 453     fileprivate func readSpine(_ tags: [AEXMLElement]) -> FRSpine {
 454         let spine = FRSpine()
 455
 456         for tag in tags {
 457             guard let idref = tag.attributes["idref"] else { continue }
 458             var linear = true
 459
 460             if tag.attributes["linear"] != nil {
 461                 linear = tag.attributes["linear"] == "yes" ? true : false
 462             }
 463
 464             if book.resources.containsById(idref) {
 465                 guard let resource = book.resources.findById(idref) else { continue }
 466                 spine.spineReferences.append(Spine(resource: resource, linear: linear))
 467             }
 468         }
 469         return spine
 470     }
 471
 472     /// Skip a file from iCloud backup.
 473     ///
 474     /// - Parameter url: File URL
 475     /// - Throws: Error if not possible
 476     fileprivate func addSkipBackupAttributeToItemAtURL(_ url: URL) throws {
 477         assert(FileManager.default.fileExists(atPath: url.path))
 478
 479         var urlToExclude = url
 480         var resourceValues = URLResourceValues()
 481         resourceValues.isExcludedFromBackup = true
 482         try urlToExclude.setResourceValues(resourceValues)
 483     }
 484
 485     // MARK: - SSZipArchive delegate
 486
 487     func zipArchiveWillUnzipArchive(atPath path: String, zipInfo: unz_global_info) {
 488         guard shouldRemoveEpub else { return }
 489         guard let epubPathToRemove = epubPathToRemove else { return }
 490         try? FileManager.default.removeItem(atPath: epubPathToRemove)
 491     }
 492 }