1 package org.readium.r2_streamer.parser;
3 import org.readium.r2_streamer.model.container.Container;
4 import org.readium.r2_streamer.model.publication.EpubPublication;
6 import org.w3c.dom.Document;
7 import org.w3c.dom.Element;
8 import org.xml.sax.InputSource;
9 import org.xml.sax.SAXException;
11 import java.io.IOException;
12 import java.io.StringReader;
14 import javax.xml.parsers.DocumentBuilder;
15 import javax.xml.parsers.DocumentBuilderFactory;
16 import javax.xml.parsers.ParserConfigurationException;
19 * Created by Shrikant Badwaik on 27-Jan-17.
22 public class EpubParser {
23 private final String TAG = "EpubParser";
25 private Container container; //can be either EpubContainer or DirectoryContainer
26 private EpubPublication publication;
27 //private String epubVersion;
29 public EpubParser(Container container) {
30 this.container = container;
31 this.publication = new EpubPublication();
34 public EpubPublication parseEpubFile(String filePath) {
37 if (filePath.contains(".cbz")) {
38 CBZParser.parseCBZ(container, publication);
41 if (isMimeTypeValid()) {
42 rootFile = parseContainer();
44 publication.internalData.put("type", "epub");
45 publication.internalData.put("rootfile", rootFile);
47 this.publication = OPFParser.parseOpfFile(rootFile, this.publication, container);
49 this.publication.encryptions = EncryptionParser.parseEncryption(container);
50 // Parse Media Overlay
51 MediaOverlayParser.parseMediaOverlay(this.publication, container);
54 } catch (EpubParserException e) {
55 System.out.println(TAG + " parserEpubFile() error " + e.toString());
60 private boolean isMimeTypeValid() throws EpubParserException {
61 String mimeTypeData = container.rawData("mimetype");
63 if (mimeTypeData.equals("application/epub+zip")) {
66 System.out.println(TAG + "Invalid MIME type: " + mimeTypeData);
67 throw new EpubParserException("Invalid MIME type");
71 private String parseContainer() throws EpubParserException {
72 String containerPath = "META-INF/container.xml";
73 String containerData = container.rawData(containerPath);
75 if (containerData == null) {
76 System.out.println(TAG + " File is missing: " + containerPath);
77 throw new EpubParserException("File is missing");
80 String opfFile = containerXmlParser(containerData);
81 if (opfFile == null) {
82 throw new EpubParserException("Error while parsing");
88 private String containerXmlParser(String containerData) throws EpubParserException { //parsing container.xml
90 String xml = containerData.replaceAll("[^\\x20-\\x7e]", "").trim(); //in case encoding problem
92 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
93 DocumentBuilder builder = factory.newDocumentBuilder();
94 Document document = builder.parse(new InputSource(new StringReader(xml)));
95 document.getDocumentElement().normalize();
96 if (document == null) {
97 throw new EpubParserException("Error while parsing container.xml");
100 Element rootElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("rootfiles").item(0)).getElementsByTagName("rootfile").item(0);
101 if (rootElement != null) {
102 String opfFile = rootElement.getAttribute("full-path");
103 if (opfFile == null) {
104 throw new EpubParserException("Missing root file element in container.xml");
107 return opfFile; //returns opf file
109 } catch (ParserConfigurationException | SAXException | IOException e) {
117 public static Document xmlParser(String xmlData) throws EpubParserException {
119 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
120 DocumentBuilder builder = factory.newDocumentBuilder();
121 Document document = builder.parse(new InputSource(new StringReader(xmlData)));
122 document.getDocumentElement().normalize();
123 if (document == null) {
124 throw new EpubParserException("Error while parsing xml file");
128 } catch (ParserConfigurationException | SAXException | IOException e) {