fnp
/
wolnelektury.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
a87d0e9
)
minor fixes in search
author
Jan Szejko
<janek37@gmail.com>
Wed, 25 Oct 2017 11:07:44 +0000
(13:07 +0200)
committer
Jan Szejko
<janek37@gmail.com>
Wed, 25 Oct 2017 11:07:44 +0000
(13:07 +0200)
doc/schema.xml
patch
|
blob
|
history
src/catalogue/templates/catalogue/book_searched.html
patch
|
blob
|
history
src/search/custom.py
patch
|
blob
|
history
src/search/index.py
patch
|
blob
|
history
src/search/templatetags/search_tags.py
patch
|
blob
|
history
src/search/views.py
patch
|
blob
|
history
diff --git
a/doc/schema.xml
b/doc/schema.xml
index
33886be
..
17868e1
100644
(file)
--- a/
doc/schema.xml
+++ b/
doc/schema.xml
@@
-135,10
+135,12
@@
<field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
<field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
<field name="title" type="text_pl" stored="false" indexed="true"/>
<field name="authors" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true"/>
<field name="translators" type="text_general" stored="false" indexed="true" multiValued="true" termPositions="true" termVectors="true" />
<field name="title" type="text_pl" stored="false" indexed="true"/>
- <field name="title_orig" type="lowercase" stored="
tru
e" indexed="true"/>
+ <field name="title_orig" type="lowercase" stored="
fals
e" indexed="true"/>
<!-- <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
<field name="published_date" type="string" stored="true" indexed="true"/>
<!-- <field name="published_date" type="tdate" stored="false" indexed="true"/>-->
<field name="published_date" type="string" stored="true" indexed="true"/>
+ <field name="metadata" type="lowercase" stored="false" indexed="true"/>
+
<field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="header_index" type="int" stored="true" indexed="true"/>
<field name="themes" type="lowercase" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="themes_pl" type="text_pl" stored="true" indexed="true" termVectors="true" termPositions="true" multiValued="true" />
<field name="header_index" type="int" stored="true" indexed="true"/>
diff --git
a/src/catalogue/templates/catalogue/book_searched.html
b/src/catalogue/templates/catalogue/book_searched.html
index
26b9f61
..
23d4736
100644
(file)
--- a/
src/catalogue/templates/catalogue/book_searched.html
+++ b/
src/catalogue/templates/catalogue/book_searched.html
@@
-21,7
+21,7
@@
{% if hit.snippet %}
{{hit.snippet|safe}}
{% else %}
{% if hit.snippet %}
{{hit.snippet|safe}}
{% else %}
- {{hit.fragment.text|truncatewords_html:15|safe}}
+ {{hit.fragment.text|truncatewords_html:15|safe}}
{# bad, now impossible #}
{% endif %}
</a>
</div>
{% endif %}
</a>
</div>
diff --git
a/src/search/custom.py
b/src/search/custom.py
index
dfface9
..
da21e01
100644
(file)
--- a/
src/search/custom.py
+++ b/
src/search/custom.py
@@
-184,5
+184,6
@@
class CustomSolrInterface(sunburnt.SolrInterface):
off = -start
snip = snip[:e + off] + mark[1] + snip[e + off:]
snip = snip[:s + off] + mark[0] + snip[s + off:]
off = -start
snip = snip[:e + off] + mark[1] + snip[e + off:]
snip = snip[:s + off] + mark[0] + snip[s + off:]
+ snip = re.sub('%s[ \t\n]+%s' % (mark[1], mark[0]), " ", snip)
return snip
return snip
diff --git
a/src/search/index.py
b/src/search/index.py
index
1cac82e
..
f9fb4b2
100644
(file)
--- a/
src/search/index.py
+++ b/
src/search/index.py
@@
-272,7
+272,7
@@
class Index(SolrIndex):
]
ignore_content_tags = [
]
ignore_content_tags = [
- 'uwaga', 'extra',
+ 'uwaga', 'extra',
'nota_red',
'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
'didaskalia',
'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
'zastepnik_tekstu', 'sekcja_asterysk', 'separator_linia', 'zastepnik_wersu',
'didaskalia',
'naglowek_aktu', 'naglowek_sceny', 'naglowek_czesc',
@@
-365,8
+365,8
@@
class Index(SolrIndex):
if master is None:
return []
if master is None:
return []
- def walker(node
, ignore_tags=()
):
- if node.tag not in
ignore
_tags:
+ def walker(node):
+ if node.tag not in
self.ignore_content
_tags:
yield node, None, None
if node.text is not None:
yield None, node.text, None
yield node, None, None
if node.text is not None:
yield None, node.text, None
@@
-421,12
+421,6
@@
class Index(SolrIndex):
book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
return doc
book.id, doc['header_index'], doc['header_span'], doc.get('fragment_anchor', ''))
return doc
- def give_me_utf8(s):
- if isinstance(s, unicode):
- return s.encode('utf-8')
- else:
- return s
-
fragments = {}
snippets = Snippets(book.id).open('w')
try:
fragments = {}
snippets = Snippets(book.id).open('w')
try:
@@
-447,7
+441,7
@@
class Index(SolrIndex):
content.append(text)
handle_text = [all_content]
content.append(text)
handle_text = [all_content]
- for start, text, end in walker(header
, ignore_tags=self.ignore_content_tags
):
+ for start, text, end in walker(header):
# handle footnotes
if start is not None and start.tag in self.footnote_tags:
footnote = []
# handle footnotes
if start is not None and start.tag in self.footnote_tags:
footnote = []
@@
-819,9
+813,10
@@
class Search(SolrIndex):
text = snippets.get((int(position),
int(length)))
snip = self.index.highlight(text=text, field=field, q=query)
text = snippets.get((int(position),
int(length)))
snip = self.index.highlight(text=text, field=field, q=query)
- snips[idx] = snip
- if snip:
- num -= 1
+ if snip not in snips:
+ snips[idx] = snip
+ if snip:
+ num -= 1
idx += 1
except IOError, e:
idx += 1
except IOError, e:
diff --git
a/src/search/templatetags/search_tags.py
b/src/search/templatetags/search_tags.py
index
c135b80
..
517f18f
100644
(file)
--- a/
src/search/templatetags/search_tags.py
+++ b/
src/search/templatetags/search_tags.py
@@
-32,7
+32,6
@@
def book_searched(context, result):
# We don't need hits which lead to sections but do not have
# snippets.
hits = filter(lambda (idx, h):
# We don't need hits which lead to sections but do not have
# snippets.
hits = filter(lambda (idx, h):
- 'fragment' in h or
result.snippets[idx] is not None,
enumerate(result.hits))
# print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
result.snippets[idx] is not None,
enumerate(result.hits))
# print "[tmpl: from %d hits selected %d]" % (len(result.hits), len(hits))
@@
-45,8
+44,8
@@
def book_searched(context, result):
continue
snip = result.snippets[idx]
# fix some formattting
continue
snip = result.snippets[idx]
# fix some formattting
- snip = re.sub
n(r"(^[ \t\n]+|[ \t\n]+$)", u"",
- re.subn(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)[0])[0]
+ snip = re.sub
(r"[ \t\n]*\n[ \t\n]*", u"\n", snip)
+ snip = re.sub(r"(^[ \t\n]+|[ \t\n]+$)", u"", snip)
snip = snip.replace("\n", "<br />").replace('---', '—')
hit['snippet'] = snip
snip = snip.replace("\n", "<br />").replace('---', '—')
hit['snippet'] = snip
@@
-54,5
+53,5
@@
def book_searched(context, result):
return {
'request': context['request'],
'book': book,
return {
'request': context['request'],
'book': book,
- 'hits':
hits and zip(*hits)[1] or
[]
+ 'hits':
zip(*hits)[1] if hits else
[]
}
}
diff --git
a/src/search/views.py
b/src/search/views.py
index
d105be6
..
14da6f6
100644
(file)
--- a/
src/search/views.py
+++ b/
src/search/views.py
@@
-222,7
+222,6
@@
def main(request):
text_phrase = filter(ensure_exists, text_phrase)
everywhere = filter(ensure_exists, everywhere)
text_phrase = filter(ensure_exists, text_phrase)
everywhere = filter(ensure_exists, everywhere)
- results = author_results + translator_results + title_results + text_phrase + everywhere
# ensure books do exists & sort them
for res in (author_results, translator_results, title_results, text_phrase, everywhere):
res.sort(reverse=True)
# ensure books do exists & sort them
for res in (author_results, translator_results, title_results, text_phrase, everywhere):
res.sort(reverse=True)
@@
-235,7
+234,7
@@
def main(request):
# #frag = Fragment.objects.get(anchor=anchor)
# return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
# return HttpResponseRedirect(results[0].book.get_absolute_url())
# #frag = Fragment.objects.get(anchor=anchor)
# return HttpResponseRedirect(fragment_hits[0]['fragment'].get_absolute_url())
# return HttpResponseRedirect(results[0].book.get_absolute_url())
- if
len(results) == 0
:
+ if
not (author_results or translator_results or title_results or text_phrase or everywhere)
:
form = PublishingSuggestForm(initial={"books": query + ", "})
return render_to_response(
'catalogue/search_no_hits.html',
form = PublishingSuggestForm(initial={"books": query + ", "})
return render_to_response(
'catalogue/search_no_hits.html',