Android/folioreader/src/main/java/com/folioreader/util/SMILParser.java

   1 package com.folioreader.util;
   2
   3 import com.folioreader.model.media_overlay.OverlayItems;
   4
   5 import org.readium.r2_streamer.parser.EpubParser;
   6 import org.readium.r2_streamer.parser.EpubParserException;
   7 import org.w3c.dom.Document;
   8 import org.w3c.dom.Element;
   9 import org.w3c.dom.Node;
  10 import org.w3c.dom.NodeList;
  11
  12 import java.util.ArrayList;
  13 import java.util.List;
  14
  15 /**
  16  * @author gautam chibde on 20/6/17.
  17  */
  18
  19 public final class SMILParser {
  20
  21     /**
  22      * Function creates list {@link OverlayItems} of all tag elements from the
  23      * input html raw string.
  24      *
  25      * @param html raw html string
  26      * @return list of {@link OverlayItems}
  27      */
  28     public static List<OverlayItems> parseSMIL(String html) {
  29         List<OverlayItems> mediaItems = new ArrayList<>();
  30         try {
  31             Document document = EpubParser.xmlParser(html);
  32             NodeList sections = document.getDocumentElement().getElementsByTagName("section");
  33             for (int i = 0; i < sections.getLength(); i++) {
  34                 parseNodes(mediaItems, (Element) sections.item(i));
  35             }
  36         } catch (EpubParserException e) {
  37             return new ArrayList<>();
  38         }
  39         return mediaItems;
  40     }
  41
  42     /**
  43      * [RECURSIVE]
  44      * Function recursively finds and parses the child elements of the input
  45      * DOM element.
  46      *
  47      * @param names   input {@link OverlayItems} where data is to be stored
  48      * @param section input DOM element
  49      */
  50     private static void parseNodes(List<OverlayItems> names, Element section) {
  51         for (Node n = section.getFirstChild(); n != null; n = n.getNextSibling()) {
  52             if (n.getNodeType() == Node.ELEMENT_NODE) {
  53                 Element e = (Element) n;
  54                 if (e.hasAttribute("id")) {
  55                     names.add(new OverlayItems(e.getAttribute("id"), e.getTagName()));
  56                 } else {
  57                     parseNodes(names, e);
  58                 }
  59             }
  60         }
  61     }
  62
  63     /**
  64      * function finds all the text content inside input html page and splits each sentence
  65      * with separator '.' and returns them as a list of {@link OverlayItems}
  66      *
  67      * @param html input raw html
  68      * @return generated {@link OverlayItems}
  69      */
  70     public static List<OverlayItems> parseSMILForTTS(String html) {
  71         List<OverlayItems> mediaItems = new ArrayList<>();
  72         try {
  73             Document document = EpubParser.xmlParser(html);
  74             NodeList sections = document.getDocumentElement().getElementsByTagName("body");
  75             for (int i = 0; i < sections.getLength(); i++) {
  76                 parseNodesTTS(mediaItems, (Element) sections.item(i));
  77             }
  78         } catch (EpubParserException e) {
  79             return new ArrayList<>();
  80         }
  81         return mediaItems;
  82     }
  83
  84     /**
  85      * [RECURSIVE]
  86      * Function recursively looks for the child element with the text content and
  87      * adds them to the input {@link OverlayItems} list
  88      *
  89      * @param names   input {@link OverlayItems} where data is to be stored
  90      * @param section input DOM element
  91      */
  92     private static void parseNodesTTS(List<OverlayItems> names, Element section) {
  93         for (Node n = section.getFirstChild(); n != null; n = n.getNextSibling()) {
  94             if (n.getNodeType() == Node.ELEMENT_NODE) {
  95                 Element e = (Element) n;
  96                 for (Node n1 = e.getFirstChild(); n1 != null; n1 = n1.getNextSibling()) {
  97                     if (n1.getTextContent() != null) {
  98                         for (String s : n1.getTextContent().split("\\.")) {
  99                             if (!s.isEmpty()) {
 100                                 OverlayItems i = new OverlayItems();
 101                                 i.setText(s);
 102                                 names.add(i);
 103                             }
 104                         }
 105                     }
 106                 }
 107                 parseNodesTTS(names, e);
 108             }
 109         }
 110     }
 111 }