--- /dev/null
+from django.contrib.admin import site
+from dcmeta.models import Description, Attr, Schema
+
+site.register(Description)
+site.register(Attr)
+site.register(Schema)
--- /dev/null
+from django.db import models
+import eav.models
+import pdb
+
+# Some monkey patches to EAV:
+
+# Allow dots & stuff
+eav.models.slugify = lambda x: x
+
+# Allow more characters
+eav.models.BaseSchema._meta.get_field_by_name("name")[0].max_length = 200
+
+from django.contrib.auth.models import User
+from django.contrib.contenttypes.generic import GenericRelation, GenericForeignKey
+from django.contrib.contenttypes.models import ContentType
+
+from lxml import etree
+from dcmeta.utils import RDFNS, common_prefixes, NamespaceDescriptor
+import logging
+
+logger = logging.getLogger("django.dcmeta")
+
+class Description(eav.models.BaseEntity):
+ """Collection of meta-data that can be assigned to an entity."""
+ object_id = models.PositiveIntegerField(blank=True, null=True)
+ content_type = models.ForeignKey(ContentType, blank=True, null=True)
+
+ about = GenericForeignKey()
+ about_uri = models.TextField()
+
+ attrs = GenericRelation('Attr', object_id_field="entity_id", content_type_field="entity_type")
+
+ # shortcuts to EAV attributes
+ dublincore = NamespaceDescriptor('http://purl.org/dc/elements/1.1/')
+ marcrel = NamespaceDescriptor('http://www.loc.gov/loc.terms/relators/')
+
+ @classmethod
+ def get_schemata_for_model(self):
+ return Schema.objects.all()
+
+ @classmethod
+ def import_rdf(cls, text):
+ doc = etree.fromstring(text)
+ xml_desc = doc.xpath('/rdf:RDF/rdf:Description', namespaces={"rdf": RDFNS.uri})
+
+ if not xml_desc:
+ raise ValueError("Invalid document structure.")
+
+ xml_desc = xml_desc[0]
+
+ desc = Description.objects.create(about_uri=xml_desc.get(RDFNS("about")))
+
+ for xml_property in xml_desc.iterchildren():
+ property, _created = Schema.objects.get_or_create(
+ name=xml_property.tag, datatype=Schema.TYPE_TEXT)
+ property.save_attr(desc, xml_property.text)
+
+ desc = Description.objects.get(pk=desc.pk)
+ return desc
+
+ def __getitem__(self, key):
+ if not isinstance(key, tuple):
+ raise ValueError
+ ns, attr = key
+
+ if ns in common_prefixes: # URI given, value stored as prefix
+ ns = common_prefixes[ns]
+
+ return getattr(self, "{%s}%s" % (ns, attr))
+
+ def __setitem__(self, key, value):
+ return setattr(self, "dc_" + key, value)
+
+class Schema(eav.models.BaseSchema):
+ pass
+
+class Choice(eav.models.BaseChoice):
+ """
+ For properties with multiply values.
+ """
+ schema = models.ForeignKey(Schema, related_name='choices')
+
+class Attr(eav.models.BaseAttribute):
+ schema = models.ForeignKey(Schema, related_name='attrs')
+ choice = models.ForeignKey(Choice, blank=True, null=True)
--- /dev/null
+# -*- coding: utf-8
+from django.test import TestCase
+from dcmeta.models import Description
+
+class ImportTests(TestCase):
+
+ def test_basic_rdf(self):
+ d = Description.import_rdf("""<?xml version="1.0"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wolnelektury.pl/document/test">
+ <dc:title>Simple test resource</dc:title>
+ </rdf:Description>
+</rdf:RDF>""")
+ self.assertEqual(d.attrs.count(), 1)
+ self.assertEqual(d['http://purl.org/dc/elements/1.1/', 'title'], u"Simple test resource")
+
+ # refetch the object
+ d = Description.objects.get(about_uri="http://wolnelektury.pl/document/test")
+
+ self.assertEqual(d.attrs.count(), 1)
+ self.assertEqual(d['http://purl.org/dc/elements/1.1/', 'title'], u"Simple test resource")
+
+ # access by prefix
+ self.assertEqual(d['dc', 'title'], u"Simple test resource")
+
+ def test_very_long_dc_property(self):
+ NAME = "very_long_prop_name.with_dots.and.other_stuff_longer_then_50_chars"
+ d = Description.import_rdf("""<?xml version="1.0"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/">
+ <rdf:Description rdf:about="http://wolnelektury.pl/document/test">
+ <dc:{0}>Simple test resource</dc:{0}>
+ </rdf:Description>
+</rdf:RDF>""".format(NAME))
+
+ self.assertEqual(d.attrs.count(), 1)
+ self.assertEqual(d['dc', NAME], u"Simple test resource")
+
+ def test_namespace_descriptors(self):
+ d = Description.import_rdf("""<?xml version="1.0"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:marcrel="http://www.loc.gov/loc.terms/relators/">
+ <rdf:Description rdf:about="http://wolnelektury.pl/document/test">
+ <dc:title>Albatros</dc:title>
+ <marcrel:trl>Lange, Antoni</marcrel:trl>
+ <marcrel:edt>Sekuła, Aleksandra</marcrel:edt>
+ </rdf:Description>
+</rdf:RDF>""")
+
+ self.assertEqual(d.dublincore.title, u"Albatros")
+ self.assertEqual(list(d.marcrel), [
+ ('trl', u"Lange, Antoni"), ('edt', u"Sekuła, Aleksandra"),
+ ])
+
+ def test_multiple_properties(self):
+ d = Description.import_rdf("""<?xml version="1.0"?>
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:dc="http://purl.org/dc/elements/1.1/"
+ xmlns:marcrel="http://www.loc.gov/loc.terms/relators/">
+ <rdf:Description rdf:about="http://wolnelektury.pl/document/test">
+ <dc:title>Albatros</dc:title>
+ <marcrel:trl>Lange, Antoni</marcrel:trl>
+ <marcrel:edt>Sekuła, Aleksandra</marcrel:edt>
+ <marcrel:edt>Niedziałkowska, Marta</marcrel:edt>
+ <marcrel:edt>Dąbek, Katarzyna</marcrel:edt>
+ </rdf:Description>
+</rdf:RDF>""")
+
+ self.assertEqual(d['dc', 'title'], u"Albatros")
+ self.assertEqual(d['marcrel', 'trl'], u"Lange, Antoni")
+ self.assertEqual(d['marcrel', 'edt'], [
+ u"Sekuła, Aleksandra",
+ u"Niedziałkowska, Marta",
+ u"Dąbek, Katarzyna",
+ ])
--- /dev/null
+class XMLNamespace(object):
+ '''A handy structure to represent names in an XML namespace.'''
+
+ def __init__(self, uri):
+ self.uri = uri
+
+ def __call__(self, tag):
+ return '{%s}%s' % (self.uri, tag)
+
+ def __contains__(self, tag):
+ return tag.startswith('{' + self.uri + '}')
+
+ def __repr__(self):
+ return 'XMLNamespace(%r)' % self.uri
+
+ def __str__(self):
+ return '%s' % self.uri
+
+ def strip(self, qtag):
+ if qtag not in self:
+ raise ValueError("Tag %s not in namespace %s" % (qtag, self.uri))
+ return qtag[len(self.uri) + 2:]
+
+ @classmethod
+ def split_tag(cls, tag):
+ if '{' != tag[0]:
+ raise ValueError
+ end = tag.find('}')
+ if end < 0:
+ raise ValueError
+ return cls(tag[1:end]), tag[end + 1:]
+
+ @classmethod
+ def tagname(cls, tag):
+ return cls.split_tag(tag)[1]
+
+
+class EmptyNamespace(XMLNamespace):
+ def __init__(self):
+ super(EmptyNamespace, self).__init__('')
+
+ def __call__(self, tag):
+ return tag
+
+# some common namespaces we use
+RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#')
+DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/')
+MARCRELNS = XMLNamespace('http://www.loc.gov/loc.terms/relators/')
+
+XINS = XMLNamespace("http://www.w3.org/2001/XInclude")
+XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml")
+
+common_uris = {
+ RDFNS.uri: 'rdf',
+ DCNS.uri: 'dc',
+ MARCRELNS.uri: 'marcrel',
+}
+
+common_prefixes = dict((i[1], i[0]) for i in common_uris.items())
+
+class NamespaceProxy(object):
+
+ def __init__(self, desc, uri):
+ object.__setattr__(self, 'uri', uri)
+ object.__setattr__(self, 'desc', desc)
+
+ def __getattr__(self, key):
+ return object.__getattribute__(self, 'desc')[self.uri, key]
+
+ def __setattr__(self, key, value):
+ object.__getattribute__(self, 'desc')[self.uri, key] = value
+
+ def __iter__(self):
+ return ((XMLNamespace.tagname(attr.schema.name), attr.value) for attr in object.__getattribute__(self, 'desc').attrs.filter(schema__name__startswith="{%s}" % self.uri))
+
+class NamespaceDescriptor(object):
+
+ def __init__(self, nsuri):
+ self.nsuri = nsuri
+
+ def __get__(self, instance, owner):
+ if instance is None:
+ return self
+ return NamespaceProxy(instance, self.nsuri)
+
+ def __set__(self, instance, value):
+ raise ValueError
+
+
+
--- /dev/null
+# Create your views here.
argument points to the version against which this change has been
recorded. Initial text will have a null parent.
- Data contains a reverse diff needed to reproduce the initial document.
+ Data contains a pickled diff needed to reproduce the initial document.
"""
author = models.ForeignKey(User)
patch = models.TextField(blank=True)
ordering = ('created_at',)
def __unicode__(self):
- return "Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch)
+ return u"Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch)
@staticmethod
def make_patch(src, dst):
changes = Change.objects.exclude(parent=None).filter(
tree=self.tree,
created_at__lte=self.created_at).order_by('created_at')
- text = ''
+ text = u''
for change in changes:
text = change.apply_to(text)
return text
- def make_child(self, patch, description):
+ def make_child(self, patch, author, description):
return self.children.create(patch=patch,
- tree=self.tree,
+ tree=self.tree, author=author,
description=description)
- def make_merge_child(self, patch, description):
+ def make_merge_child(self, patch, author, description):
return self.merge_children.create(patch=patch,
- tree=self.tree,
+ tree=self.tree, author=author,
description=description)
def apply_to(self, text):
return mdiff.patch(text, pickle.loads(self.patch.encode('ascii')))
-
- def merge_with(self, other):
+ def merge_with(self, other, author, description=u"Automatic merge."):
assert self.tree_id == other.tree_id # same tree
if other.parent_id == self.pk:
# immediate child
result = ''.join(merge.merge_lines())
patch = self.make_patch(local, result)
return self.children.create(
- patch=patch,
- merge_parent=other, tree=self.tree, description=u"Automatic merge")
+ patch=patch, merge_parent=other, tree=self.tree,
+ author=author, description=description)
class Document(models.Model):
"""
- File in repository.
-
+ File in repository.
"""
creator = models.ForeignKey(User)
head = models.ForeignKey(Change,
help_text=_("Name for this file to display."))
def __unicode__(self):
- return "{0}, HEAD: {1}".format(self.name, self.head_id)
+ return u"{0}, HEAD: {1}".format(self.name, self.head_id)
+
+ @models.permalink
+ def get_absolute_url(self):
+ return ('dvcs.views.document_data', (), {
+ 'document_id': self.id,
+ 'version': self.head_id,
+ })
def materialize(self, version=None):
if self.head is None:
old_head = self.head
if parent != old_head:
- change = parent.make_merge_child(patch, kwargs.get('description', ''))
+ change = parent.make_merge_child(patch, kwargs['author'], kwargs.get('description', ''))
# not Fast-Forward - perform a merge
- self.head = old_head.merge_with(change)
+ self.head = old_head.merge_with(change, author=kwargs['author'])
else:
- self.head = parent.make_child(patch, kwargs.get('description', ''))
+ self.head = parent.make_child(patch, kwargs['author'], kwargs.get('description', ''))
self.save()
return self.head
from django.test import TestCase
from dvcs.models import Change, Document
+from django.contrib.auth.models import User
class DocumentModelTests(TestCase):
+ def setUp(self):
+ self.user = User.objects.create_user("tester", "tester@localhost.local")
+
def assertTextEqual(self, given, expected):
return self.assertEqual(given, expected,
"Expected '''%s'''\n differs from text: '''%s'''" % (expected, given)
)
def test_empty_file(self):
- doc = Document.objects.create(name=u"Sample Document")
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
self.assert_(doc.head is not None)
self.assertEqual(doc.materialize(), u"")
def test_single_commit(self):
- doc = Document.objects.create(name=u"Sample Document")
- doc.commit(text=u"Ala ma kota", description="Commit #1")
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
+ doc.commit(text=u"Ala ma kota", description="Commit #1", author=self.user)
self.assert_(doc.head is not None)
self.assertEqual(doc.change_set.count(), 2)
self.assertEqual(doc.materialize(), u"Ala ma kota")
def test_chained_commits(self):
- doc = Document.objects.create(name=u"Sample Document")
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
c1 = doc.commit(description="Commit #1", text=u"""
Line #1
Line #2 is cool
- """)
+ """, author=self.user)
c2 = doc.commit(description="Commit #2", text=u"""
Line #1
Line #2 is hot
- """)
+ """, author=self.user)
c3 = doc.commit(description="Commit #3", text=u"""
Line #1
... is hot
Line #3 ate Line #2
- """)
+ """, author=self.user)
self.assert_(doc.head is not None)
self.assertEqual(doc.change_set.count(), 4)
def test_parallel_commit_noconflict(self):
- doc = Document.objects.create(name=u"Sample Document")
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
self.assert_(doc.head is not None)
base = doc.head
base = doc.commit(description="Commit #1", text=u"""
Line #1
Line #2
-""")
+""", author=self.user)
c1 = doc.commit(description="Commit #2", text=u"""
Line #1 is hot
Line #2
-""", parent=base)
+""", parent=base, author=self.user)
self.assertTextEqual(c1.materialize(), u"""
Line #1 is hot
Line #2
Line #1
Line #2
Line #3
-""", parent=base)
+""", parent=base, author=self.user)
self.assertEqual(doc.change_set.count(), 5)
self.assertTextEqual(doc.materialize(), u"""
Line #1 is hot
""")
def test_parallel_commit_conflict(self):
- doc = Document.objects.create(name=u"Sample Document")
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
self.assert_(doc.head is not None)
base = doc.head
base = doc.commit(description="Commit #1", text=u"""
Line #1
Line #2
Line #3
-""")
+""", author=self.user)
c1 = doc.commit(description="Commit #2", text=u"""
Line #1
Line #2 is hot
Line #3
-""", parent=base)
+""", parent=base, author=self.user)
c2 = doc.commit(description="Commit #3", text=u"""
Line #1
Line #2 is cool
Line #3
-""", parent=base)
+""", parent=base, author=self.user)
self.assertEqual(doc.change_set.count(), 5)
self.assertTextEqual(doc.materialize(), u"""
Line #1
Line #2 is cool
>>>>>>>
Line #3
+""")
+
+ def test_multiply_parallel_commits(self):
+ doc = Document.objects.create(name=u"Sample Document", creator=self.user)
+ self.assert_(doc.head is not None)
+ c1 = doc.commit(description="Commit A1", text=u"""
+Line #1
+
+Line #2
+
+Line #3
+""", author=self.user)
+ c2 = doc.commit(description="Commit A2", text=u"""
+Line #1 *
+
+Line #2
+
+Line #3
+""", author=self.user)
+ c3 = doc.commit(description="Commit B1", text=u"""
+Line #1
+
+Line #2 **
+
+Line #3
+""", parent=c1, author=self.user)
+ c4 = doc.commit(description="Commit C1", text=u"""
+Line #1 *
+
+Line #2
+
+Line #3 ***
+""", parent=c2, author=self.user)
+ self.assertEqual(doc.change_set.count(), 7)
+ self.assertTextEqual(doc.materialize(), u"""
+Line #1 *
+
+Line #2 **
+
+Line #3 ***
""")
--- /dev/null
+# -*- coding: utf-8
+from django.conf.urls.defaults import *
+
+urlpatterns = patterns('dvcs.views',
+ url(r'^data/(?P<document_id>[^/]+)/(?P<version>.*)$', 'document_data', name='storage_document_data'),
+)
# Create your views here.
from django.views.generic.simple import direct_to_template
+from django import http
from dvcs.models import Document
def document_list(request, template_name="dvcs/document_list.html"):
"documents": Document.objects.all(),
})
+def document_data(request, document_id, version=None):
+ doc = Document.objects.get(pk=document_id)
+ return http.HttpResponse(doc.materialize(version or None), content_type="text/plain")
+
def document_history(request, docid, template_name="dvcs/document_history.html"):
document = Document.objects.get(pk=docid)
return direct_to_template(request, template_name, {
--- /dev/null
+from django.db import models
+
+# Create your models here.
--- /dev/null
+"""
+This file demonstrates two different styles of tests (one doctest and one
+unittest). These will both pass when you run "manage.py test".
+
+Replace these with more appropriate tests for your application.
+"""
+
+from django.test import TestCase
+
+class SimpleTest(TestCase):
+ def test_basic_addition(self):
+ """
+ Tests that 1 + 1 always equals 2.
+ """
+ self.failUnlessEqual(1 + 1, 2)
+
+__test__ = {"doctest": """
+Another way to test that 1 + 1 is equal to 2.
+
+>>> 1 + 1 == 2
+True
+"""}
+
--- /dev/null
+# -*- coding: utf-8
+from django.conf.urls.defaults import *
+from django.views.generic.simple import redirect_to
+from django.conf import settings
+
+
+PART = ur"""[ ĄĆĘŁŃÓŚŻŹąćęłńóśżź0-9\w_.-]+"""
+
+urlpatterns = patterns('newwiki.views',
+ url(r'^$', redirect_to, {'url': 'documents/', 'permanent': False}),
+
+ url(r'^documents/$', 'document_list', name='wiki_document_list'),
+ # url(r'^documents/(?P<identifier>.+)$', 'document_list'),
+
+ url(r'^edit/(?P<document_id>.+)$', 'editor', name="wiki_editor"),
+
+# url(r'^catalogue/([^/]+)/([^/]+)/$', 'document_list'),
+# url(r'^catalogue/([^/]+)/([^/]+)/([^/]+)$', 'document_list'),
+#
+# url(r'^(?P<name>%s)$' % PART,
+# 'editor', name="wiki_editor"),
+#
+# url(r'^(?P<name>[^/]+)/readonly$',
+# 'editor_readonly', name="wiki_editor_readonly"),
+#
+# url(r'^create/(?P<name>[^/]+)',
+# 'create_missing', name='wiki_create_missing'),
+#
+# url(r'^(?P<directory>[^/]+)/gallery$',
+# 'gallery', name="wiki_gallery"),
+#
+# url(r'^(?P<name>[^/]+)/history$',
+# 'history', name="wiki_history"),
+#
+# url(r'^(?P<name>[^/]+)/text$',
+# 'text', name="wiki_text"),
+#
+# url(r'^(?P<name>[^/]+)/publish$', 'publish', name="wiki_publish"),
+# url(r'^(?P<name>[^/]+)/publish/(?P<version>\d+)$', 'publish', name="wiki_publish"),
+#
+# url(r'^(?P<name>[^/]+)/diff$', 'diff', name="wiki_diff"),
+# url(r'^(?P<name>[^/]+)/tags$', 'add_tag', name="wiki_add_tag"),
+
+
+
+)
--- /dev/null
+from django.conf import settings
+
+from django import http
+
+# Views
+from django.views.generic.simple import direct_to_template
+
+# Decorators
+from django.contrib.auth.decorators import login_required
+from django.views.decorators.http import require_POST, require_GET
+from django.views.decorators.cache import never_cache
+
+# Models
+from django.contrib.contenttypes.models import ContentType
+from dvcs.models import Document
+from dcmeta.models import Description
+
+@never_cache
+def document_list(request):
+ return direct_to_template(request,
+ 'wiki/document_list.html', extra_context={
+ 'docs': Document.objects.all(),
+ })
+
+
+@never_cache
+def editor(request, document_id, template_name='wiki/document_details.html'):
+
+ try:
+ doc = Document.objects.get(pk=document_id)
+ except Document.DoesNotExist:
+ raise http.Http404
+
+ meta_data = Description.objects.get(object_id=document_id,
+ content_type=ContentType.objects.get_for_model(doc))
+
+ return direct_to_template(request, template_name, extra_context={
+ 'document': doc,
+ 'document_name': doc.name,
+ 'document_info': meta_data,
+ 'document_meta': meta_data,
+# 'forms': {
+# "text_save": DocumentTextSaveForm(prefix="textsave"),
+# "add_tag": DocumentTagForm(prefix="addtag"),
+# },
+ })
<tbody>
{% for doc in docs %}
<tr>
- <td colspan="3"><a target="_blank" data-id="{{doc}}"
- href="{% url wiki_editor doc %}">{{ doc|wiki_title }}</a></td>
+ <td colspan="3"><a target="_blank" data-id="{{ doc }}"
+ href="{{ doc.get_absolute_url }}">{{ doc }}</a></td>
<!-- placeholder </td> -->
</tr>
{% endfor %}
return decorated
-@never_cache
-def document_list(request):
- return direct_to_template(request, 'wiki/document_list.html', extra_context={
- 'docs': getstorage().all(),
- 'last_docs': sorted(request.session.get("wiki_last_docs", {}).items(),
- key=operator.itemgetter(1), reverse=True),
- })
-
-
-@never_cache
-@normalized_name
-def editor(request, name, template_name='wiki/document_details.html'):
- storage = getstorage()
-
- try:
- document = storage.get(name)
- except DocumentNotFound:
- return http.HttpResponseRedirect(reverse("wiki_create_missing", args=[name]))
-
- access_time = datetime.now()
- last_documents = request.session.get("wiki_last_docs", {})
- last_documents[name] = access_time
-
- if len(last_documents) > MAX_LAST_DOCS:
- oldest_key = min(last_documents, key=last_documents.__getitem__)
- del last_documents[oldest_key]
- request.session['wiki_last_docs'] = last_documents
-
- return direct_to_template(request, template_name, extra_context={
- 'document': document,
- 'document_name': document.name,
- 'document_info': document.info,
- 'document_meta': document.meta,
- 'forms': {
- "text_save": DocumentTextSaveForm(prefix="textsave"),
- "add_tag": DocumentTagForm(prefix="addtag"),
- },
- })
@require_GET
'south',
'sorl.thumbnail',
'filebrowser',
+ 'eav',
'dvcs',
+ 'dcmeta',
'wiki',
'toolbar',
(r'^admin/', include(admin.site.urls)),
url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/documents/'}),
- url(r'^documents/', include('wiki.urls')),
+ url(r'^wiki/', include('newwiki.urls')),
+ url(r'^storage/', include('dvcs.urls')),
# Static files (should be served by Apache)
url(r'^%s(?P<path>.+)$' % settings.MEDIA_URL[1:], 'django.views.static.serve',
{'document_root': settings.MEDIA_ROOT, 'show_indexes': True}),
url(r'^%s(?P<path>.+)$' % settings.STATIC_URL[1:], 'django.views.static.serve',
{'document_root': settings.STATIC_ROOT, 'show_indexes': True}),
- (r'^documents/', include(wiki.urls)),
- url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/documents/'}),
+ url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/wiki/', 'permanent': False}),
)
--- /dev/null
+#!/usr/bin/env python
+import argparse
+import os
+import sys
+import re
+
+from librarian.parser import WLDocument
+
+_BASE = ur"""http://wiki.wolnepodreczniki.pl/(?:index.php\?title=)?Lektury(?::|/)"""
+
+ABOUT_PATTERNS = (
+ ur"""%s(?P<title>[^/]+)/?$""" % _BASE,
+ ur"""%s(?P<author>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE,
+ ur"""%s(?P<author>[^/]+)/(?P<collection>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE,
+ ur"""%s(?P<author>[^/]+)/(?P<collection>[^/]+)/(?P<part>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE,
+)
+
+def compile_patterns(patterns):
+ for p in patterns:
+ yield re.compile(p, re.UNICODE)
+
+def match_first(text, patterns):
+ for pattern in patterns:
+ m = pattern.match(text)
+ if m is not None:
+ return m.groups()
+ return False
+
+
+class Task(object):
+
+ def __init__(self):
+ self.documents = set()
+ self.invalid = set()
+ self.unrecognized = {}
+ self.duplicates = {}
+ self.about_patterns = list(compile_patterns(ABOUT_PATTERNS))
+
+ assert match_first("""http://wiki.wolnepodreczniki.pl/index.php?title=Lektury:Mickiewicz/%C5%9Amier%C4%87_Pu%C5%82kownika/""", self.about_patterns)
+ assert match_first("""http://wiki.wolnepodreczniki.pl/Lektury:Anonim/Ala""", self.about_patterns)
+ assert match_first("""http://wiki.wolnepodreczniki.pl/Lektury:Karpi%C5%84ski/Sielanki/Powr%C3%B3t_z_Warszawy_na_wie%C5%9B""", self.about_patterns)
+
+ def read_file(self, path):
+ return WLDocument.from_file(path)
+
+ def run(self):
+ for file in os.listdir(u"."):
+ try:
+ doc = self.read_file(file)
+ about_link = unicode(doc.book_info.about)
+ url = doc.book_info.url
+ if not about_link:
+ if not url:
+ self.invalid.add(file)
+ continue
+ self.unrecognized[file] = url
+ continue
+
+ m = match_first(about_link, self.about_patterns)
+ if m:
+ if m in self.documents:
+ l = self.duplicates.get(m, [])
+ l.append(file)
+ self.duplicates[m] = l
+ else:
+ self.documents.add(m)
+ else:
+ self.unrecognized[file] = about_link
+ except Exception:
+ self.invalid.add(file)
+
+
+
+ print u"""\
+{0} correct documents,
+{1} invalid,
+{2} unrecognized,
+\t{unrecognized}
+{3} duplicate names
+\t{duplicates}""".format(
+ len(self.documents),
+ len(self.invalid),
+ len(self.unrecognized),
+ len(self.duplicates),
+ duplicates='\n\t'.join(repr(x) for x in self.duplicates.items()),
+ unrecognized='\n\t'.join(repr(x) for x in self.unrecognized.items())
+ )
+
+ for doc in self.documents:
+ print u"http://redakcja.wolnelektury.pl/documents/{0}".format('/'.join(doc).lower())
+
+
+if __name__ == '__main__':
+
+ task = Task()
+ task.run()