1 package org.readium.r2_streamer.parser;
3 import org.readium.r2_streamer.model.container.Container;
4 import org.readium.r2_streamer.model.publication.EpubPublication;
5 import org.readium.r2_streamer.model.publication.contributor.Contributor;
6 import org.readium.r2_streamer.model.publication.link.Link;
7 import org.readium.r2_streamer.model.publication.metadata.MetaData;
8 import org.readium.r2_streamer.model.publication.metadata.MetadataItem;
9 import org.readium.r2_streamer.model.publication.rendition.RenditionFlow;
10 import org.readium.r2_streamer.model.publication.rendition.RenditionLayout;
11 import org.readium.r2_streamer.model.publication.rendition.RenditionOrientation;
12 import org.readium.r2_streamer.model.publication.rendition.RenditionSpread;
13 import org.readium.r2_streamer.model.publication.subject.Subject;
15 import org.w3c.dom.Attr;
16 import org.w3c.dom.Document;
17 import org.w3c.dom.Element;
18 import org.w3c.dom.NamedNodeMap;
19 import org.w3c.dom.NodeList;
21 import java.text.ParseException;
22 import java.text.SimpleDateFormat;
23 import java.util.Date;
24 import java.util.HashMap;
28 * Created by gautam chibde on 31/5/17.
31 public class OPFParser {
33 private static final String TAG = OPFParser.class.getSimpleName();
35 public static EpubPublication parseOpfFile(String rootFile, EpubPublication publication, Container container) throws EpubParserException {
36 String opfData = container.rawData(rootFile);
37 if (opfData == null) {
38 System.out.println(TAG + "File is missing: " + rootFile);
39 throw new EpubParserException("File is missing");
42 Document document = EpubParser.xmlParser(opfData);
43 if (document == null) {
44 throw new EpubParserException("Error while parsing");
47 MetaData metaData = new MetaData();
50 metaData.title = parseMainTitle(document);
53 metaData.identifier = parseUniqueIdentifier(document);
56 Element descriptionElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("metadata").item(0)).getElementsByTagName("dc:description").item(0);
57 if (descriptionElement != null) {
58 metaData.description = descriptionElement.getTextContent();
62 Element dateElement = (Element) ((Element) document.getDocumentElement().getElementsByTagName("metadata").item(0)).getElementsByTagName("dc:date").item(0);
63 if (dateElement != null) {
65 SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
66 Date modifiedDate = dateFormat.parse(dateElement.getTextContent());
67 metaData.modified = modifiedDate;
68 } catch (ParseException e) {
74 NodeList subjectNodeList = document.getElementsByTagName("dc:subject");
75 if (subjectNodeList != null) {
76 for (int i = 0; i < subjectNodeList.getLength(); i++) {
77 Element subjectElement = (Element) subjectNodeList.item(i);
78 metaData.subjects.add(new Subject(subjectElement.getTextContent()));
83 NodeList languageNodeList = document.getElementsByTagName("dc:language");
84 if (languageNodeList != null) {
85 for (int i = 0; i < languageNodeList.getLength(); i++) {
86 Element languageElement = (Element) languageNodeList.item(i);
87 metaData.languages.add(languageElement.getTextContent());
92 NodeList rightNodeList = document.getElementsByTagName("dc:rights");
93 if (rightNodeList != null) {
94 for (int i = 0; i < rightNodeList.getLength(); i++) {
95 Element rightElement = (Element) rightNodeList.item(i);
96 metaData.rights.add(rightElement.getTextContent());
101 NodeList publisherNodeList = document.getElementsByTagName("dc:publisher");
102 if (publisherNodeList != null) {
103 for (int i = 0; i < publisherNodeList.getLength(); i++) {
104 Element publisherElement = (Element) publisherNodeList.item(i);
105 metaData.publishers.add(new Contributor(publisherElement.getTextContent()));
110 NodeList authorNodeList = document.getElementsByTagName("dc:creator");
111 if (authorNodeList != null) {
112 for (int i = 0; i < authorNodeList.getLength(); i++) {
113 Element authorElement = (Element) authorNodeList.item(i);
114 parseContributor(authorElement, document, metaData);
119 NodeList contributorNodeList = document.getElementsByTagName("dc:contributor");
120 if (contributorNodeList != null) {
121 for (int i = 0; i < contributorNodeList.getLength(); i++) {
122 Element contributorElement = (Element) contributorNodeList.item(i);
123 parseContributor(contributorElement, document, metaData);
128 NodeList metaNodeList = document.getElementsByTagName("meta");
129 if (metaNodeList != null) {
130 for (int i = 0; i < metaNodeList.getLength(); i++) {
131 Element metaElement = (Element) metaNodeList.item(i);
132 if (metaElement.getAttribute("property").equals("rendition:layout")) {
133 metaData.rendition.layout = RenditionLayout.valueOfEnum(metaElement.getTextContent());
136 if (metaElement.getAttribute("property").equals("rendition:flow")) {
137 metaData.rendition.flow = RenditionFlow.valueOfEnum(metaElement.getTextContent());
140 if (metaElement.getAttribute("property").equals("rendition:orientation")) {
141 metaData.rendition.orientation = RenditionOrientation.valueOfEnum(metaElement.getTextContent());
144 if (metaElement.getAttribute("property").equals("rendition:spread")) {
145 metaData.rendition.spread = RenditionSpread.valueOfEnum(metaElement.getTextContent());
148 if (metaElement.getAttribute("property").equals("rendition:viewport")) {
149 metaData.rendition.viewport = metaElement.getTextContent();
151 if (metaElement.getAttribute("property").equals("media:duration")) {
152 MetadataItem metadataItem = new MetadataItem();
153 metadataItem.property = metaElement.getAttribute("refines");
154 metadataItem.value = metaElement.getTextContent();
155 metaData.getOtherMetadata().add(metadataItem);
160 Element spineElement = (Element) document.getElementsByTagName("spine").item(0);
161 if (spineElement != null) {
162 metaData.direction = spineElement.getAttribute("page-progression-direction");
165 publication.metadata = metaData;
168 String coverId = null;
169 if (metaNodeList != null) {
170 for (int i = 0; i < metaNodeList.getLength(); i++) {
171 Element metaElement = (Element) metaNodeList.item(i);
172 if (metaElement.getAttribute("name").equals("cover")) {
173 coverId = metaElement.getAttribute("content");
177 parseSpineAndResourcesAndGuide(document, publication, coverId, rootFile, container);
182 private static String parseMainTitle(Document document) {
183 Element titleElement;
184 NodeList titleNodes = document.getElementsByTagName("dc:title");
185 if (titleNodes != null) {
186 if (titleNodes.getLength() > 1) {
187 for (int i = 0; i < titleNodes.getLength(); i++) {
188 titleElement = (Element) titleNodes.item(i);
189 String titleId = titleElement.getAttribute("id");
190 NodeList metaNodes = document.getElementsByTagName("meta");
191 if (metaNodes != null) {
192 for (int j = 0; j < metaNodes.getLength(); j++) {
193 Element metaElement = (Element) metaNodes.item(j);
194 if (metaElement.getAttribute("property").equals("title-type")) {
195 if (metaElement.getAttribute("refines").equals("#" + titleId)) {
196 if (metaElement.getTextContent().equals("main")) {
197 return titleElement.getTextContent();
205 titleElement = (Element) titleNodes.item(0);
206 return titleElement.getTextContent();
213 private static String parseUniqueIdentifier(Document document) {
214 Element identifierElement;
215 NodeList identifierNodes = document.getElementsByTagName("dc:identifier");
216 if (identifierNodes != null) {
217 if (identifierNodes.getLength() > 1) {
218 for (int i = 0; i < identifierNodes.getLength(); i++) {
219 identifierElement = (Element) identifierNodes.item(i);
220 String uniqueId = identifierElement.getAttribute("unique-identifier");
221 if (identifierElement.getAttribute("id").equals(uniqueId)) {
222 return identifierElement.getTextContent();
226 identifierElement = (Element) identifierNodes.item(0);
227 return identifierElement.getTextContent();
233 private static void parseContributor(Element element, Document document, MetaData metaData) {
234 Contributor contributor = createContributorFromElement(element, document);
235 if (contributor != null) {
236 String role = contributor.getRole();
240 metaData.creators.add(contributor);
243 metaData.translators.add(contributor);
246 metaData.artists.add(contributor);
249 metaData.editors.add(contributor);
252 metaData.illustrators.add(contributor);
255 metaData.colorists.add(contributor);
258 metaData.narrators.add(contributor);
261 metaData.publishers.add(contributor);
264 metaData.contributors.add(contributor);
268 if (element.getTagName().equals("dc:creator")) {
269 metaData.creators.add(contributor);
271 metaData.contributors.add(contributor);
278 private static Contributor createContributorFromElement(Element element, Document document) {
279 Contributor contributor = new Contributor(element.getTextContent());
280 if (contributor != null) {
281 if (element.hasAttribute("opf:role")) {
282 String role = element.getAttribute("opf:role");
284 contributor.role = role;
287 if (element.hasAttribute("opf:file-as")) {
288 String sortAs = element.getAttribute("opf:file-as");
289 if (sortAs != null) {
290 contributor.sortAs = sortAs;
293 if (element.hasAttribute("id")) {
294 String identifier = element.getAttribute("id");
295 if (identifier != null) {
296 NodeList metas = document.getElementsByTagName("meta");
298 for (int i = 0; i < metas.getLength(); i++) {
299 Element metaElement = (Element) metas.item(i);
300 if (metaElement.getAttribute("property").equals("role")) {
301 if (metaElement.getAttribute("refines").equals("#" + identifier)) {
302 contributor.role = metaElement.getTextContent();
314 private static void parseSpineAndResourcesAndGuide(Document document, EpubPublication publication, String coverId, String rootFile, Container container) throws EpubParserException {
316 int endIndex = rootFile.indexOf("/");
317 System.out.println(TAG + " rootFile:= " + rootFile);
318 String packageName = "";
319 if (endIndex != -1) {
320 packageName = rootFile.substring(startIndex, endIndex) + "/";
322 Map<String, Link> manifestLinks = new HashMap<>();
324 NodeList itemNodes = document.getElementsByTagName("item");
325 if (itemNodes != null) {
326 for (int i = 0; i < itemNodes.getLength(); i++) {
327 Element itemElement = (Element) itemNodes.item(i);
329 Link link = new Link();
330 NamedNodeMap nodeMap = itemElement.getAttributes();
331 for (int j = 0; j < nodeMap.getLength(); j++) {
332 Attr attr = (Attr) nodeMap.item(j);
333 switch (attr.getNodeName()) {
335 link.href = packageName + attr.getNodeValue();
338 link.typeLink = attr.getNodeValue();
339 if (link.typeLink.equalsIgnoreCase("application/smil+xml")) {
340 link.duration = MetadataItem.getSMILDuration(publication.metadata.getOtherMetadata(), link.id);
344 if (attr.getNodeValue().equals("nav")) {
345 link.rel.add("contents");
346 } else if (attr.getNodeValue().equals("cover-image")) {
347 link.rel.add("cover");
348 } else if (!attr.getNodeValue().equals("nav") && !attr.getNodeValue().equals("cover-image")) {
349 link.properties.add(attr.getNodeValue());
352 case "media-overlay":
353 link.properties.add("media-overlay");
354 link.properties.add("resource:" + attr.getNodeValue());
358 String id = itemElement.getAttribute("id");
359 String href = itemElement.getAttribute("href");
360 if (href != null && href.contains("ncx")) {
361 NCXParser.parseNCXFile(link.getHref(), container, publication, packageName);
365 if (id.equals(coverId)) {
367 publication.coverLink = new Link();
368 publication.coverLink.rel.add("cover");
369 publication.coverLink.setId(id);
370 publication.coverLink.setHref(link.getHref());
371 publication.coverLink.setTypeLink(link.getTypeLink());
372 publication.coverLink.setProperties(link.getProperties());
374 publication.linkMap.put(link.href, link);
375 manifestLinks.put(id, link);
379 NodeList itemRefNodes = document.getElementsByTagName("itemref");
380 if (itemRefNodes != null) {
381 for (int i = 0; i < itemRefNodes.getLength(); i++) {
382 Element itemRefElement = (Element) itemRefNodes.item(i);
383 String id = itemRefElement.getAttribute("idref");
384 if (manifestLinks.containsKey(id)) {
385 publication.spines.add(manifestLinks.get(id));
386 manifestLinks.remove(id);
390 publication.resources.addAll(manifestLinks.values());
392 NodeList referenceNodes = document.getElementsByTagName("reference");
393 if (referenceNodes != null) {
394 for (int i = 0; i < referenceNodes.getLength(); i++) {
395 Element referenceElement = (Element) referenceNodes.item(i);
396 Link link = new Link();
397 link.setType(referenceElement.getAttribute("type"));
398 link.setChapterTitle(referenceElement.getAttribute("title"));
399 link.setHref(referenceElement.getAttribute("href"));
400 publication.guides.add(link);