X-Git-Url: https://git.mdrn.pl/pylucene.git/blobdiff_plain/a2e61f0c04805cfcb8706176758d1283c7e3a55c..aaeed5504b982cf3545252ab528713250aa33eed:/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/demohtml/TestHtmlParser.java diff --git a/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/demohtml/TestHtmlParser.java b/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/demohtml/TestHtmlParser.java deleted file mode 100644 index 1301287..0000000 --- a/lucene-java-3.4.0/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/demohtml/TestHtmlParser.java +++ /dev/null @@ -1,132 +0,0 @@ -package org.apache.lucene.benchmark.byTask.feeds.demohtml; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.io.Reader; -import java.io.StringReader; -import java.util.Properties; - -import org.apache.lucene.util.LuceneTestCase; - -public class TestHtmlParser extends LuceneTestCase { - - public void testUnicode() throws Exception { - String text = "汉语"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("汉语", parser); - } - - public void testEntities() throws Exception { - String text = "汉语¥"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("汉语¥", parser); - } - - public void testComments() throws Exception { - String text = "foo"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("foo", parser); - } - - public void testScript() throws Exception { - String text = "foo"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("foo", parser); - } - - public void testStyle() throws Exception { - String text = "" + - "foo"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("foo", parser); - } - - public void testDoctype() throws Exception { - String text = "" + - "foo"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("foo", parser); - } - - public void testMeta() throws Exception { - String text = "" + - "" + - "" + - "" + - "" + - "foobar"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - Properties tags = parser.getMetaTags(); - assertEquals(4, tags.size()); - assertEquals("1", tags.get("a")); - assertEquals("2", tags.get("b")); - assertEquals("this is a test", tags.get("keywords")); - assertEquals("text/html;charset=utf-8", tags.get("content-type")); - } - - public void testTitle() throws Exception { - String text = "foobar"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertEquals("foo", parser.getTitle()); - } - - public void testSummary() throws Exception { - String text = "foo" + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - "Summarize me. Summarize me. Summarize me. Summarize me. " + - ""; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertEquals(200, parser.getSummary().length()); - } - - // LUCENE-590 - public void testSummaryTitle() throws Exception { - String text = "SummarySummary of the document"; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertEquals("Summary of the document", parser.getSummary()); - } - - // LUCENE-2246 - public void testTurkish() throws Exception { - String text = "" + - "\"ş\"" + - ""; - HTMLParser parser = new HTMLParser(new StringReader(text)); - assertReadsTo("[ş]", parser); - } - - private void assertReadsTo(String expected, HTMLParser parser) throws IOException { - Reader reader = parser.getReader(); - StringBuilder builder = new StringBuilder(); - int ch = 0; - while ((ch = reader.read()) != -1) { - builder.append((char)ch); - } - assertEquals(expected, builder.toString()); - } -}