1 package com.folioreader.util;
3 import com.folioreader.model.media_overlay.OverlayItems;
5 import org.readium.r2_streamer.parser.EpubParser;
6 import org.readium.r2_streamer.parser.EpubParserException;
7 import org.w3c.dom.Document;
8 import org.w3c.dom.Element;
9 import org.w3c.dom.Node;
10 import org.w3c.dom.NodeList;
12 import java.util.ArrayList;
13 import java.util.List;
16 * @author gautam chibde on 20/6/17.
19 public final class SMILParser {
22 * Function creates list {@link OverlayItems} of all tag elements from the
23 * input html raw string.
25 * @param html raw html string
26 * @return list of {@link OverlayItems}
28 public static List<OverlayItems> parseSMIL(String html) {
29 List<OverlayItems> mediaItems = new ArrayList<>();
31 Document document = EpubParser.xmlParser(html);
32 NodeList sections = document.getDocumentElement().getElementsByTagName("section");
33 for (int i = 0; i < sections.getLength(); i++) {
34 parseNodes(mediaItems, (Element) sections.item(i));
36 } catch (EpubParserException e) {
37 return new ArrayList<>();
44 * Function recursively finds and parses the child elements of the input
47 * @param names input {@link OverlayItems} where data is to be stored
48 * @param section input DOM element
50 private static void parseNodes(List<OverlayItems> names, Element section) {
51 for (Node n = section.getFirstChild(); n != null; n = n.getNextSibling()) {
52 if (n.getNodeType() == Node.ELEMENT_NODE) {
53 Element e = (Element) n;
54 if (e.hasAttribute("id")) {
55 names.add(new OverlayItems(e.getAttribute("id"), e.getTagName()));
64 * function finds all the text content inside input html page and splits each sentence
65 * with separator '.' and returns them as a list of {@link OverlayItems}
67 * @param html input raw html
68 * @return generated {@link OverlayItems}
70 public static List<OverlayItems> parseSMILForTTS(String html) {
71 List<OverlayItems> mediaItems = new ArrayList<>();
73 Document document = EpubParser.xmlParser(html);
74 NodeList sections = document.getDocumentElement().getElementsByTagName("body");
75 for (int i = 0; i < sections.getLength(); i++) {
76 parseNodesTTS(mediaItems, (Element) sections.item(i));
78 } catch (EpubParserException e) {
79 return new ArrayList<>();
86 * Function recursively looks for the child element with the text content and
87 * adds them to the input {@link OverlayItems} list
89 * @param names input {@link OverlayItems} where data is to be stored
90 * @param section input DOM element
92 private static void parseNodesTTS(List<OverlayItems> names, Element section) {
93 for (Node n = section.getFirstChild(); n != null; n = n.getNextSibling()) {
94 if (n.getNodeType() == Node.ELEMENT_NODE) {
95 Element e = (Element) n;
96 for (Node n1 = e.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
97 if (n1.getTextContent() != null) {
98 for (String s : n1.getTextContent().split("\\.")) {
100 OverlayItems i = new OverlayItems();
107 parseNodesTTS(names, e);