- License: [MIT License](http://www.opensource.org/licenses/mit-license.php)
- Type: library (Django application)
- django-chunks
+django-chunks
-------------
- Source: [Google Code](http://code.google.com/p/django-chunks/)
- Authors: Clint Ecker <clintecker@gmail.com>
- Type: library (Django aplication)
- Notes: Aplication based on [django-tagging](http://code.google.com/p/django-tagging/), also [MIT](http://www.opensource.org/licenses/mit-license.php) license.
-django-piston (0.2.3rc)
-------------------------
- - http://bitbucket.org/jespern/django-piston/wiki/Home
-markupstring
-------------
- - Source: [ASPN Cookbook](http://code.activestate.com/recipes/389023/)
- - Authors: Thomas Hinkle
- - License: [MIT License](http://code.activestate.com/help/terms/)
- - Type: library
- - Notes: Patched by Marek Stępniowski <marek@stepniowski.com> to accept Unicode strings
-
-
Authors
=======
from newtagging.models import TagBase, tags_updated
from newtagging import managers
from catalogue.fields import JSONField, OverwritingFileField
-from catalogue.utils import create_zip, split_tags
+from catalogue.utils import create_zip, split_tags, truncate_html_words
from catalogue.tasks import touch_tag, index_book
from shutil import copy
from glob import glob
def build_html(self):
- from markupstring import MarkupString
from django.core.files.base import ContentFile
from slughifi import slughifi
from librarian import html
continue
text = fragment.to_string()
- short_text = ''
- markup = MarkupString(text)
- if (len(markup) > 240):
- short_text = unicode(markup[:160])
+ short_text = truncate_html_words(text, 15)
+ if text == short_text:
+ short_text = ''
new_fragment = Fragment.objects.create(anchor=fragment.id, book=self,
text=text, short_text=short_text)
for lang, langname in settings.LANGUAGES:
permanent_cache.delete(cache_key % (self.id, lang))
+ def get_short_text(self):
+ """Returns short version of the fragment."""
+ return self.short_text if self.short_text else self.text
+
def short_html(self):
if self.id:
cache_key = "Fragment.short_html/%d/%s" % (self.id, get_language())
return reverse('catalogue.views.tagged_object_list', args=[
'/'.join((Tag.categories_dict[category], slug))
])
-
-
-@register.filter
-@stringfilter
-def removewholetags(value, tags):
- """Removes a space separated list of [X]HTML tags from the output.
-
- FIXME: It makes the assumption the removed tags aren't nested.
-
- """
- tags = [re.escape(tag) for tag in tags.split()]
- tags_re = u'(%s)' % u'|'.join(tags)
- tag_re = re.compile(ur'<%s[^>]*>.*?</\s*\1\s*>' % tags_re, re.U)
- value = tag_re.sub(u'', value)
- return value
from __future__ import with_statement
import random
+import re
import time
from base64 import urlsafe_b64encode
from django.core.files.uploadedfile import UploadedFile
from django.core.files.base import File
from django.core.files.storage import DefaultStorage
+from django.utils.encoding import force_unicode
from django.utils.hashcompat import sha_constructor
from django.conf import settings
from celery.task import task
def __init__(self, path, *args, **kwargs):
self.path = path
- return super(ExistingFile, self).__init__(*args, **kwargs)
+ super(ExistingFile, self).__init__(*args, **kwargs)
def temporary_file_path(self):
return self.path
offset = 0
stop = total_len - len(items)
continue
+
+
+def truncate_html_words(s, num, end_text='...'):
+ """Truncates HTML to a certain number of words (not counting tags and
+ comments). Closes opened tags if they were correctly closed in the given
+ html. Takes an optional argument of what should be used to notify that the
+ string has been truncated, defaulting to ellipsis (...).
+
+ Newlines in the HTML are preserved.
+
+ This is just a version of django.utils.text.truncate_html_words with no space before the end_text.
+ """
+ s = force_unicode(s)
+ length = int(num)
+ if length <= 0:
+ return u''
+ html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
+ # Set up regular expressions
+ re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
+ re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
+ # Count non-HTML words and keep note of open tags
+ pos = 0
+ end_text_pos = 0
+ words = 0
+ open_tags = []
+ while words <= length:
+ m = re_words.search(s, pos)
+ if not m:
+ # Checked through whole string
+ break
+ pos = m.end(0)
+ if m.group(1):
+ # It's an actual non-HTML word
+ words += 1
+ if words == length:
+ end_text_pos = pos
+ continue
+ # Check for tag
+ tag = re_tag.match(m.group(0))
+ if not tag or end_text_pos:
+ # Don't worry about non tags or tags after our truncate point
+ continue
+ closing_tag, tagname, self_closing = tag.groups()
+ tagname = tagname.lower() # Element names are always case-insensitive
+ if self_closing or tagname in html4_singlets:
+ pass
+ elif closing_tag:
+ # Check for match in open tags list
+ try:
+ i = open_tags.index(tagname)
+ except ValueError:
+ pass
+ else:
+ # SGML: An end tag closes, back to the matching start tag, all unclosed intervening start tags with omitted end tags
+ open_tags = open_tags[i+1:]
+ else:
+ # Add it to the start of the open tags list
+ open_tags.insert(0, tagname)
+ if words <= length:
+ # Don't try to close tags if we don't need to truncate
+ return s
+ out = s[:end_text_pos]
+ if end_text:
+ out += end_text
+ # Close any tags still open
+ for tag in open_tags:
+ out += '</%s>' % tag
+ # Return string
+ return out
'book': book,
'request': context.get('request'),
'hits': hits,
+ 'main_link': book.get_absolute_url(),
}
-Subproject commit e394602de9243608d1e99a3de448a75646f1a77f
+Subproject commit 05843e29b4fffcc676da0e67b7a840a24d7b91d4
+++ /dev/null
-# Code taken from ActiveState Python recipes:
-# http://code.activestate.com/recipes/389023/
-#
-# Changed by Marek Stepniowski <marek@stepniowski.com> to handle unicode characters
-import xml.sax
-
-
-class simpleHandler(xml.sax.ContentHandler):
- """A simple handler that provides us with indices of marked up content."""
- def __init__(self):
- self.elements = [] #this will contain a list of elements and their start/end indices
- self.open_elements = [] #this holds info on open elements while we wait for their close
- self.content = ""
-
- def startElement(self, name, attrs):
- if name == 'foobar': return # we require an outer wrapper, which we promptly ignore.
- self.open_elements.append({'name':name,
- 'attrs':attrs.copy(),
- 'start':len(self.content),
- })
-
- def endElement(self, name):
- if name == 'foobar': return # we require an outer wrapper, which we promptly ignore.
- for i in range(len(self.open_elements)):
- e = self.open_elements[i]
- if e['name'] == name:
- # append a (start,end), name, attrs
- self.elements.append(((e['start'], #start position
- len(self.content)), # current (end) position
- e['name'], e['attrs'])
- )
- del self.open_elements[i]
- return
-
- def characters(self, chunk):
- self.content += chunk
-
-
-class MarkupString(unicode):
- """A simple class for dealing with marked up strings. When we are sliced, we return
- valid marked up strings, preserving markup."""
- def __init__(self, string):
- unicode.__init__(self)
- self.handler = simpleHandler()
- xml.sax.parseString((u"<foobar>%s</foobar>" % string).encode('utf-8'), self.handler)
- self.raw = self.handler.content
-
- def __getitem__(self, n):
- return self.__getslice__(n, n + 1)
-
- def __getslice__(self, s, e):
- # only include relevant elements
- if not e or e > len(self.raw): e = len(self.raw)
- elements = filter(lambda tp: (tp[0][1] >= s and # end after the start...
- tp[0][0] <= e # and start before the end
- ),
- self.handler.elements)
- ends = {}
- starts = {}
- for el in elements:
- # cycle through elements that effect our slice and keep track of
- # where their start and end tags should go.
- pos = el[0]
- name = el[1]
- attrs = el[2]
- # write our start tag <stag att="val"...>
- stag = "<%s" % name
- for k, v in attrs.items(): stag += " %s=%s" % (k, xml.sax.saxutils.quoteattr(v))
- stag += ">"
- etag = "</%s>" % name # simple end tag
- spos = pos[0]
- epos = pos[1]
- if spos < s: spos = s
- if epos > e: epos = e
- if epos != spos: # we don't care about tags that don't markup any text
- if not starts.has_key(spos): starts[spos] = []
- starts[spos].append(stag)
- if not ends.has_key(epos): ends[epos] = []
- ends[epos].append(etag)
- outbuf = "" # our actual output string
- for pos in range(s, e): # we move through positions
- char = self.raw[pos]
- if ends.has_key(pos): # if there are endtags to insert...
- for et in ends[pos]: outbuf += et
- if starts.has_key(pos): # if there are start tags to insert
- mystarts = starts[pos]
- # reverse these so the order works out,e.g. <i><b><u></u></b></i>
- mystarts.reverse()
- for st in mystarts: outbuf += st
- outbuf += char
- if ends.has_key(e):
- for et in ends[e]: outbuf += et
- return MarkupString(outbuf)
-
- def __len__(self):
- return len(self.raw)
-
<p>{% trans "In fragment" %}
{% if hit.themes_hit %}{% trans ", for themes:" %}{% for t in hit.themes_hit %}{{t.name}} {% endfor %}{% endif %}
</p>
- <a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.short_text|safe}}</a>
+ <a href="{{hit.fragment.get_absolute_url}}">{{hit.fragment.get_short_text|safe}}</a>
</div>
{% endif %}
{% endif %}
{% if fragment %}
<a href="{{ fragment.get_absolute_url }}" class="cite">
<blockquote class="cite-body">
- {{ fragment.text|removewholetags:"a"|truncatewords_html:15|safe }}
+ {{ fragment.get_short_text|safe }}
</blockquote>
<p class="mono">{{ fragment.book.pretty_title }}</p>
</a>