Added Android code
[wl-app.git] / Android / r2-streamer / r2-parser / src / main / java / org / readium / r2_streamer / parser / OPFParser.java
1 package org.readium.r2_streamer.parser;
2
3 import org.readium.r2_streamer.model.container.Container;
4 import org.readium.r2_streamer.model.publication.EpubPublication;
5 import org.readium.r2_streamer.model.publication.contributor.Contributor;
6 import org.readium.r2_streamer.model.publication.link.Link;
7 import org.readium.r2_streamer.model.publication.metadata.MetaData;
8 import org.readium.r2_streamer.model.publication.metadata.MetadataItem;
9 import org.readium.r2_streamer.model.publication.rendition.RenditionFlow;
10 import org.readium.r2_streamer.model.publication.rendition.RenditionLayout;
11 import org.readium.r2_streamer.model.publication.rendition.RenditionOrientation;
12 import org.readium.r2_streamer.model.publication.rendition.RenditionSpread;
13 import org.readium.r2_streamer.model.publication.subject.Subject;
14
15 import org.w3c.dom.Attr;
16 import org.w3c.dom.Document;
17 import org.w3c.dom.Element;
18 import org.w3c.dom.NamedNodeMap;
19 import org.w3c.dom.NodeList;
20
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Date;
24 import java.util.HashMap;
25 import java.util.Map;
26
27 /**
28  * Created by gautam chibde on 31/5/17.
29  */
30
31 public class OPFParser {
32
33     private static final String TAG = OPFParser.class.getSimpleName();
34
35     public static EpubPublication parseOpfFile(String rootFile, EpubPublication publication, Container container) throws EpubParserException {
36         String opfData = container.rawData(rootFile);
37         if (opfData == null) {
38             System.out.println(TAG + "File is missing: " + rootFile);
39             throw new EpubParserException("File is missing");
40         }
41
42         Document document = EpubParser.xmlParser(opfData);
43         if (document == null) {
44             throw new EpubParserException("Error while parsing");
45         }
46
47         MetaData metaData = new MetaData();
48
49         //title
50         metaData.title = parseMainTitle(document);
51
52         //identifier
53         metaData.identifier = parseUniqueIdentifier(document);
54
55         //description
56         Element descriptionElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("metadata").item(0)).getElementsByTagName("dc:description").item(0);
57         if (descriptionElement != null) {
58             metaData.description = descriptionElement.getTextContent();
59         }
60
61         //modified date
62         Element dateElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("metadata").item(0)).getElementsByTagName("dc:date").item(0);
63         if (dateElement != null) {
64             try {
65                 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
66                 Date modifiedDate = dateFormat.parse(dateElement.getTextContent());
67                 metaData.modified = modifiedDate;
68             } catch (ParseException e) {
69                 e.printStackTrace();
70             }
71         }
72
73         //subject
74         NodeList subjectNodeList = document.getElementsByTagName("dc:subject");
75         if (subjectNodeList != null) {
76             for (int i = 0; i < subjectNodeList.getLength(); i++) {
77                 Element subjectElement = (Element) subjectNodeList.item(i);
78                 metaData.subjects.add(new Subject(subjectElement.getTextContent()));
79             }
80         }
81
82         //language
83         NodeList languageNodeList = document.getElementsByTagName("dc:language");
84         if (languageNodeList != null) {
85             for (int i = 0; i < languageNodeList.getLength(); i++) {
86                 Element languageElement = (Element) languageNodeList.item(i);
87                 metaData.languages.add(languageElement.getTextContent());
88             }
89         }
90
91         //rights
92         NodeList rightNodeList = document.getElementsByTagName("dc:rights");
93         if (rightNodeList != null) {
94             for (int i = 0; i < rightNodeList.getLength(); i++) {
95                 Element rightElement = (Element) rightNodeList.item(i);
96                 metaData.rights.add(rightElement.getTextContent());
97             }
98         }
99
100         //publisher
101         NodeList publisherNodeList = document.getElementsByTagName("dc:publisher");
102         if (publisherNodeList != null) {
103             for (int i = 0; i < publisherNodeList.getLength(); i++) {
104                 Element publisherElement = (Element) publisherNodeList.item(i);
105                 metaData.publishers.add(new Contributor(publisherElement.getTextContent()));
106             }
107         }
108
109         //creator
110         NodeList authorNodeList = document.getElementsByTagName("dc:creator");
111         if (authorNodeList != null) {
112             for (int i = 0; i < authorNodeList.getLength(); i++) {
113                 Element authorElement = (Element) authorNodeList.item(i);
114                 parseContributor(authorElement, document, metaData);
115             }
116         }
117
118         //contributor
119         NodeList contributorNodeList = document.getElementsByTagName("dc:contributor");
120         if (contributorNodeList != null) {
121             for (int i = 0; i < contributorNodeList.getLength(); i++) {
122                 Element contributorElement = (Element) contributorNodeList.item(i);
123                 parseContributor(contributorElement, document, metaData);
124             }
125         }
126
127         //rendition property
128         NodeList metaNodeList = document.getElementsByTagName("meta");
129         if (metaNodeList != null) {
130             for (int i = 0; i < metaNodeList.getLength(); i++) {
131                 Element metaElement = (Element) metaNodeList.item(i);
132                 if (metaElement.getAttribute("property").equals("rendition:layout")) {
133                     metaData.rendition.layout = RenditionLayout.valueOfEnum(metaElement.getTextContent());
134                 }
135
136                 if (metaElement.getAttribute("property").equals("rendition:flow")) {
137                     metaData.rendition.flow = RenditionFlow.valueOfEnum(metaElement.getTextContent());
138                 }
139
140                 if (metaElement.getAttribute("property").equals("rendition:orientation")) {
141                     metaData.rendition.orientation = RenditionOrientation.valueOfEnum(metaElement.getTextContent());
142                 }
143
144                 if (metaElement.getAttribute("property").equals("rendition:spread")) {
145                     metaData.rendition.spread = RenditionSpread.valueOfEnum(metaElement.getTextContent());
146                 }
147
148                 if (metaElement.getAttribute("property").equals("rendition:viewport")) {
149                     metaData.rendition.viewport = metaElement.getTextContent();
150                 }
151                 if (metaElement.getAttribute("property").equals("media:duration")) {
152                     MetadataItem metadataItem = new MetadataItem();
153                     metadataItem.property = metaElement.getAttribute("refines");
154                     metadataItem.value = metaElement.getTextContent();
155                     metaData.getOtherMetadata().add(metadataItem);
156                 }
157             }
158         }
159
160         Element spineElement = (Element) document.getElementsByTagName("spine").item(0);
161         if (spineElement != null) {
162             metaData.direction = spineElement.getAttribute("page-progression-direction");
163         }
164
165         publication.metadata = metaData;
166
167         //cover
168         String coverId = null;
169         if (metaNodeList != null) {
170             for (int i = 0; i < metaNodeList.getLength(); i++) {
171                 Element metaElement = (Element) metaNodeList.item(i);
172                 if (metaElement.getAttribute("name").equals("cover")) {
173                     coverId = metaElement.getAttribute("content");
174                 }
175             }
176         }
177         parseSpineAndResourcesAndGuide(document, publication, coverId, rootFile, container);
178         return publication;
179     }
180
181     //@Nullable
182     private static String parseMainTitle(Document document) {
183         Element titleElement;
184         NodeList titleNodes = document.getElementsByTagName("dc:title");
185         if (titleNodes != null) {
186             if (titleNodes.getLength() > 1) {
187                 for (int i = 0; i < titleNodes.getLength(); i++) {
188                     titleElement = (Element) titleNodes.item(i);
189                     String titleId = titleElement.getAttribute("id");
190                     NodeList metaNodes = document.getElementsByTagName("meta");
191                     if (metaNodes != null) {
192                         for (int j = 0; j < metaNodes.getLength(); j++) {
193                             Element metaElement = (Element) metaNodes.item(j);
194                             if (metaElement.getAttribute("property").equals("title-type")) {
195                                 if (metaElement.getAttribute("refines").equals("#" + titleId)) {
196                                     if (metaElement.getTextContent().equals("main")) {
197                                         return titleElement.getTextContent();
198                                     }
199                                 }
200                             }
201                         }
202                     }
203                 }
204             } else {
205                 titleElement = (Element) titleNodes.item(0);
206                 return titleElement.getTextContent();
207             }
208         }
209         return null;
210     }
211
212     //@Nullable
213     private static String parseUniqueIdentifier(Document document) {
214         Element identifierElement;
215         NodeList identifierNodes = document.getElementsByTagName("dc:identifier");
216         if (identifierNodes != null) {
217             if (identifierNodes.getLength() > 1) {
218                 for (int i = 0; i < identifierNodes.getLength(); i++) {
219                     identifierElement = (Element) identifierNodes.item(i);
220                     String uniqueId = identifierElement.getAttribute("unique-identifier");
221                     if (identifierElement.getAttribute("id").equals(uniqueId)) {
222                         return identifierElement.getTextContent();
223                     }
224                 }
225             } else {
226                 identifierElement = (Element) identifierNodes.item(0);
227                 return identifierElement.getTextContent();
228             }
229         }
230         return null;
231     }
232
233     private static void parseContributor(Element element, Document document, MetaData metaData) {
234         Contributor contributor = createContributorFromElement(element, document);
235         if (contributor != null) {
236             String role = contributor.getRole();
237             if (role != null) {
238                 switch (role) {
239                     case "aut":
240                         metaData.creators.add(contributor);
241                         break;
242                     case "trl":
243                         metaData.translators.add(contributor);
244                         break;
245                     case "art":
246                         metaData.artists.add(contributor);
247                         break;
248                     case "edt":
249                         metaData.editors.add(contributor);
250                         break;
251                     case "ill":
252                         metaData.illustrators.add(contributor);
253                         break;
254                     case "clr":
255                         metaData.colorists.add(contributor);
256                         break;
257                     case "nrt":
258                         metaData.narrators.add(contributor);
259                         break;
260                     case "pbl":
261                         metaData.publishers.add(contributor);
262                         break;
263                     default:
264                         metaData.contributors.add(contributor);
265                         break;
266                 }
267             } else {
268                 if (element.getTagName().equals("dc:creator")) {
269                     metaData.creators.add(contributor);
270                 } else {
271                     metaData.contributors.add(contributor);
272                 }
273             }
274         }
275     }
276
277     //@Nullable
278     private static Contributor createContributorFromElement(Element element, Document document) {
279         Contributor contributor = new Contributor(element.getTextContent());
280         if (contributor != null) {
281             if (element.hasAttribute("opf:role")) {
282                 String role = element.getAttribute("opf:role");
283                 if (role != null) {
284                     contributor.role = role;
285                 }
286             }
287             if (element.hasAttribute("opf:file-as")) {
288                 String sortAs = element.getAttribute("opf:file-as");
289                 if (sortAs != null) {
290                     contributor.sortAs = sortAs;
291                 }
292             }
293             if (element.hasAttribute("id")) {
294                 String identifier = element.getAttribute("id");
295                 if (identifier != null) {
296                     NodeList metas = document.getElementsByTagName("meta");
297                     if (metas != null) {
298                         for (int i = 0; i < metas.getLength(); i++) {
299                             Element metaElement = (Element) metas.item(i);
300                             if (metaElement.getAttribute("property").equals("role")) {
301                                 if (metaElement.getAttribute("refines").equals("#" + identifier)) {
302                                     contributor.role = metaElement.getTextContent();
303                                 }
304                             }
305                         }
306                     }
307                 }
308             }
309             return contributor;
310         }
311         return null;
312     }
313
314     private static void parseSpineAndResourcesAndGuide(Document document, EpubPublication publication, String coverId, String rootFile, Container container) throws EpubParserException {
315         int startIndex = 0;
316         int endIndex = rootFile.indexOf("/");
317         System.out.println(TAG + " rootFile:= " + rootFile);
318         String packageName = "";
319         if (endIndex != -1) {
320             packageName = rootFile.substring(startIndex, endIndex) + "/";
321         }
322         Map<String, Link> manifestLinks = new HashMap<>();
323
324         NodeList itemNodes = document.getElementsByTagName("item");
325         if (itemNodes != null) {
326             for (int i = 0; i < itemNodes.getLength(); i++) {
327                 Element itemElement = (Element) itemNodes.item(i);
328
329                 Link link = new Link();
330                 NamedNodeMap nodeMap = itemElement.getAttributes();
331                 for (int j = 0; j < nodeMap.getLength(); j++) {
332                     Attr attr = (Attr) nodeMap.item(j);
333                     switch (attr.getNodeName()) {
334                         case "href":
335                             link.href = packageName + attr.getNodeValue();
336                             break;
337                         case "media-type":
338                             link.typeLink = attr.getNodeValue();
339                             if (link.typeLink.equalsIgnoreCase("application/smil+xml")) {
340                                 link.duration = MetadataItem.getSMILDuration(publication.metadata.getOtherMetadata(), link.id);
341                             }
342                             break;
343                         case "properties":
344                             if (attr.getNodeValue().equals("nav")) {
345                                 link.rel.add("contents");
346                             } else if (attr.getNodeValue().equals("cover-image")) {
347                                 link.rel.add("cover");
348                             } else if (!attr.getNodeValue().equals("nav") && !attr.getNodeValue().equals("cover-image")) {
349                                 link.properties.add(attr.getNodeValue());
350                             }
351                             break;
352                         case "media-overlay":
353                             link.properties.add("media-overlay");
354                             link.properties.add("resource:" + attr.getNodeValue());
355                     }
356                 }
357
358                 String id = itemElement.getAttribute("id");
359                 String href = itemElement.getAttribute("href");
360                 if (href != null && href.contains("ncx")) {
361                     NCXParser.parseNCXFile(link.getHref(), container, publication, packageName);
362                 }
363                 link.setId(id);
364
365                 if (id.equals(coverId)) {
366
367                     publication.coverLink = new Link();
368                     publication.coverLink.rel.add("cover");
369                     publication.coverLink.setId(id);
370                     publication.coverLink.setHref(link.getHref());
371                     publication.coverLink.setTypeLink(link.getTypeLink());
372                     publication.coverLink.setProperties(link.getProperties());
373                 }
374                 publication.linkMap.put(link.href, link);
375                 manifestLinks.put(id, link);
376             }
377         }
378
379         NodeList itemRefNodes = document.getElementsByTagName("itemref");
380         if (itemRefNodes != null) {
381             for (int i = 0; i < itemRefNodes.getLength(); i++) {
382                 Element itemRefElement = (Element) itemRefNodes.item(i);
383                 String id = itemRefElement.getAttribute("idref");
384                 if (manifestLinks.containsKey(id)) {
385                     publication.spines.add(manifestLinks.get(id));
386                     manifestLinks.remove(id);
387                 }
388             }
389         }
390         publication.resources.addAll(manifestLinks.values());
391
392         NodeList referenceNodes = document.getElementsByTagName("reference");
393         if (referenceNodes != null) {
394             for (int i = 0; i < referenceNodes.getLength(); i++) {
395                 Element referenceElement = (Element) referenceNodes.item(i);
396                 Link link = new Link();
397                 link.setType(referenceElement.getAttribute("type"));
398                 link.setChapterTitle(referenceElement.getAttribute("title"));
399                 link.setHref(referenceElement.getAttribute("href"));
400                 publication.guides.add(link);
401             }
402         }
403     }
404 }