fix in librarian

[wolnelektury.git] / apps / search / custom.py
diff --git a/apps/search/custom.py b/apps/search/custom.py

index 788b6c4..b3b704d 100644 (file)
--- a/apps/search/custom.py
+++ b/apps/search/custom.py
@@ -1,10 +1,15 @@
-
+# -*- coding: utf-8 -*-
+# This file is part of Wolnelektury, licensed under GNU Affero GPLv3 or later.
+# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
+#
  from sunburnt import sunburnt
  from lxml import etree
  import urllib
  import warnings
  from sunburnt import search
  import copy
  from sunburnt import sunburnt
  from lxml import etree
  import urllib
  import warnings
  from sunburnt import search
  import copy
+from httplib2 import socket
+import re
  
  
  class TermVectorOptions(search.Options):
  
  
  class TermVectorOptions(search.Options):
@@ -89,7 +94,10 @@ class CustomSolrInterface(sunburnt.SolrInterface):
              self.writeable = False
          elif 'r' not in mode:
              self.readable = False
              self.writeable = False
          elif 'r' not in mode:
              self.readable = False
-        self.init_schema()
+        try:
+            self.init_schema()
+        except socket.error, e:
+            raise socket.error, "Cannot connect to Solr server, and search indexing is enabled (%s)" % str(e)
  
      def _analyze(self, **kwargs):
          if not self.readable:
  
      def _analyze(self, **kwargs):
          if not self.readable:
@@ -130,6 +138,25 @@ class CustomSolrInterface(sunburnt.SolrInterface):
          terms = map(lambda n: unicode(n.text), terms)
          return terms
  
          terms = map(lambda n: unicode(n.text), terms)
          return terms
  
+    def expand_margins(self, text, start, end):
+        totlen = len(text)
+
+        def is_boundary(x):
+            ws = re.compile(r"\W", re.UNICODE)
+            return bool(ws.match(x))
+
+        while start > 0:
+            if is_boundary(text[start - 1]):
+                break
+            start -= 1
+
+        while end < totlen - 1:
+            if is_boundary(text[end + 1]):
+                break
+            end += 1
+
+        return (start, end)
+
      def substring(self, text, matches, margins=30, mark=("<b>", "</b>")):
          start = None
          end = None
      def substring(self, text, matches, margins=30, mark=("<b>", "</b>")):
          start = None
          end = None
@@ -138,15 +165,21 @@ class CustomSolrInterface(sunburnt.SolrInterface):
                                ((s, e),
                                 (max(0, s - margins), min(totlen, e + margins))),
                                    matches)
                                ((s, e),
                                 (max(0, s - margins), min(totlen, e + margins))),
                                    matches)
+        matches_margins = map(lambda (m, (s, e)):
+                              (m, self.expand_margins(text, s, e)),
+            matches_margins)
+
+            # lets start with first match
          (start, end) = matches_margins[0][1]
          (start, end) = matches_margins[0][1]
-        matches = []
+        matches = [matches_margins[0][0]]
+
          for (m, (s, e)) in matches_margins[1:]:
              if end < s or start > e:
                  continue
              start = min(start, s)
              end = max(end, e)
              matches.append(m)
          for (m, (s, e)) in matches_margins[1:]:
              if end < s or start > e:
                  continue
              start = min(start, s)
              end = max(end, e)
              matches.append(m)
-            
+
          snip = text[start:end]
          matches.sort(lambda a, b: cmp(b[0], a[0]))
  
          snip = text[start:end]
          matches.sort(lambda a, b: cmp(b[0], a[0]))
  
@@ -154,7 +187,5 @@ class CustomSolrInterface(sunburnt.SolrInterface):
              off = - start
              snip = snip[:e + off] + mark[1] + snip[e + off:]
              snip = snip[:s + off] + mark[0] + snip[s + off:]
              off = - start
              snip = snip[:e + off] + mark[1] + snip[e + off:]
              snip = snip[:s + off] + mark[0] + snip[s + off:]
-            # maybe break on word boundaries
  
          return snip
  
          return snip
-