redakcja/static/js/lib/codemirror-0.8/tokenize.js

   1 // A framework for simple tokenizers. Takes care of newlines and
   2 // white-space, and of getting the text from the source stream into
   3 // the token object. A state is a function of two arguments -- a
   4 // string stream and a setState function. The second can be used to
   5 // change the tokenizer's state, and can be ignored for stateless
   6 // tokenizers. This function should advance the stream over a token
   7 // and return a string or object containing information about the next
   8 // token, or null to pass and have the (new) state be called to finish
   9 // the token. When a string is given, it is wrapped in a {style, type}
  10 // object. In the resulting object, the characters consumed are stored
  11 // under the content property. Any whitespace following them is also
  12 // automatically consumed, and added to the value property. (Thus,
  13 // content is the actual meaningful part of the token, while value
  14 // contains all the text it spans.)
  15
  16 function tokenizer(source, state) {
  17   // Newlines are always a separate token.
  18   function isWhiteSpace(ch) {
  19     // The messy regexp is because IE's regexp matcher is of the
  20     // opinion that non-breaking spaces are no whitespace.
  21     return ch != "\n" && /^[\s\u00a0]*$/.test(ch);
  22   }
  23
  24   var tokenizer = {
  25     state: state,
  26
  27     take: function(type) {
  28       if (typeof(type) == "string")
  29         type = {style: type, type: type};
  30
  31       type.content = (type.content || "") + source.get();
  32       if (!/\n$/.test(type.content))
  33         source.nextWhile(isWhiteSpace);
  34       type.value = type.content + source.get();
  35       return type;
  36     },
  37
  38     next: function () {
  39       if (!source.more()) throw StopIteration;
  40
  41       var type;
  42       if (source.equals("\n")) {
  43         source.next();
  44         return this.take("whitespace");
  45       }
  46
  47       if (source.applies(isWhiteSpace))
  48         type = "whitespace";
  49       else
  50         while (!type)
  51           type = this.state(source, function(s) {tokenizer.state = s;});
  52
  53       return this.take(type);
  54     }
  55   };
  56   return tokenizer;
  57 }