From: Łukasz Rekucki Date: Tue, 22 Jun 2010 15:32:18 +0000 (+0200) Subject: Added DCMeta - EAV based application to represent document meta-data. Started to... X-Git-Url: https://git.mdrn.pl/redakcja.git/commitdiff_plain/refs/heads/backend-rewrite Added DCMeta - EAV based application to represent document meta-data. Started to rewrite the wiki app. --- diff --git a/apps/dcmeta/__init__.py b/apps/dcmeta/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/dcmeta/admin.py b/apps/dcmeta/admin.py new file mode 100644 index 00000000..26777f84 --- /dev/null +++ b/apps/dcmeta/admin.py @@ -0,0 +1,6 @@ +from django.contrib.admin import site +from dcmeta.models import Description, Attr, Schema + +site.register(Description) +site.register(Attr) +site.register(Schema) diff --git a/apps/dcmeta/models.py b/apps/dcmeta/models.py new file mode 100644 index 00000000..eaa6e864 --- /dev/null +++ b/apps/dcmeta/models.py @@ -0,0 +1,85 @@ +from django.db import models +import eav.models +import pdb + +# Some monkey patches to EAV: + +# Allow dots & stuff +eav.models.slugify = lambda x: x + +# Allow more characters +eav.models.BaseSchema._meta.get_field_by_name("name")[0].max_length = 200 + +from django.contrib.auth.models import User +from django.contrib.contenttypes.generic import GenericRelation, GenericForeignKey +from django.contrib.contenttypes.models import ContentType + +from lxml import etree +from dcmeta.utils import RDFNS, common_prefixes, NamespaceDescriptor +import logging + +logger = logging.getLogger("django.dcmeta") + +class Description(eav.models.BaseEntity): + """Collection of meta-data that can be assigned to an entity.""" + object_id = models.PositiveIntegerField(blank=True, null=True) + content_type = models.ForeignKey(ContentType, blank=True, null=True) + + about = GenericForeignKey() + about_uri = models.TextField() + + attrs = GenericRelation('Attr', object_id_field="entity_id", content_type_field="entity_type") + + # shortcuts to EAV attributes + dublincore = NamespaceDescriptor('http://purl.org/dc/elements/1.1/') + marcrel = NamespaceDescriptor('http://www.loc.gov/loc.terms/relators/') + + @classmethod + def get_schemata_for_model(self): + return Schema.objects.all() + + @classmethod + def import_rdf(cls, text): + doc = etree.fromstring(text) + xml_desc = doc.xpath('/rdf:RDF/rdf:Description', namespaces={"rdf": RDFNS.uri}) + + if not xml_desc: + raise ValueError("Invalid document structure.") + + xml_desc = xml_desc[0] + + desc = Description.objects.create(about_uri=xml_desc.get(RDFNS("about"))) + + for xml_property in xml_desc.iterchildren(): + property, _created = Schema.objects.get_or_create( + name=xml_property.tag, datatype=Schema.TYPE_TEXT) + property.save_attr(desc, xml_property.text) + + desc = Description.objects.get(pk=desc.pk) + return desc + + def __getitem__(self, key): + if not isinstance(key, tuple): + raise ValueError + ns, attr = key + + if ns in common_prefixes: # URI given, value stored as prefix + ns = common_prefixes[ns] + + return getattr(self, "{%s}%s" % (ns, attr)) + + def __setitem__(self, key, value): + return setattr(self, "dc_" + key, value) + +class Schema(eav.models.BaseSchema): + pass + +class Choice(eav.models.BaseChoice): + """ + For properties with multiply values. + """ + schema = models.ForeignKey(Schema, related_name='choices') + +class Attr(eav.models.BaseAttribute): + schema = models.ForeignKey(Schema, related_name='attrs') + choice = models.ForeignKey(Choice, blank=True, null=True) diff --git a/apps/dcmeta/tests.py b/apps/dcmeta/tests.py new file mode 100644 index 00000000..d30d1750 --- /dev/null +++ b/apps/dcmeta/tests.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 +from django.test import TestCase +from dcmeta.models import Description + +class ImportTests(TestCase): + + def test_basic_rdf(self): + d = Description.import_rdf(""" + + + Simple test resource + +""") + self.assertEqual(d.attrs.count(), 1) + self.assertEqual(d['http://purl.org/dc/elements/1.1/', 'title'], u"Simple test resource") + + # refetch the object + d = Description.objects.get(about_uri="http://wolnelektury.pl/document/test") + + self.assertEqual(d.attrs.count(), 1) + self.assertEqual(d['http://purl.org/dc/elements/1.1/', 'title'], u"Simple test resource") + + # access by prefix + self.assertEqual(d['dc', 'title'], u"Simple test resource") + + def test_very_long_dc_property(self): + NAME = "very_long_prop_name.with_dots.and.other_stuff_longer_then_50_chars" + d = Description.import_rdf(""" + + + Simple test resource + +""".format(NAME)) + + self.assertEqual(d.attrs.count(), 1) + self.assertEqual(d['dc', NAME], u"Simple test resource") + + def test_namespace_descriptors(self): + d = Description.import_rdf(""" + + + Albatros + Lange, Antoni + Sekuła, Aleksandra + +""") + + self.assertEqual(d.dublincore.title, u"Albatros") + self.assertEqual(list(d.marcrel), [ + ('trl', u"Lange, Antoni"), ('edt', u"Sekuła, Aleksandra"), + ]) + + def test_multiple_properties(self): + d = Description.import_rdf(""" + + + Albatros + Lange, Antoni + Sekuła, Aleksandra + Niedziałkowska, Marta + Dąbek, Katarzyna + +""") + + self.assertEqual(d['dc', 'title'], u"Albatros") + self.assertEqual(d['marcrel', 'trl'], u"Lange, Antoni") + self.assertEqual(d['marcrel', 'edt'], [ + u"Sekuła, Aleksandra", + u"Niedziałkowska, Marta", + u"Dąbek, Katarzyna", + ]) diff --git a/apps/dcmeta/utils.py b/apps/dcmeta/utils.py new file mode 100644 index 00000000..60a24823 --- /dev/null +++ b/apps/dcmeta/utils.py @@ -0,0 +1,90 @@ +class XMLNamespace(object): + '''A handy structure to represent names in an XML namespace.''' + + def __init__(self, uri): + self.uri = uri + + def __call__(self, tag): + return '{%s}%s' % (self.uri, tag) + + def __contains__(self, tag): + return tag.startswith('{' + self.uri + '}') + + def __repr__(self): + return 'XMLNamespace(%r)' % self.uri + + def __str__(self): + return '%s' % self.uri + + def strip(self, qtag): + if qtag not in self: + raise ValueError("Tag %s not in namespace %s" % (qtag, self.uri)) + return qtag[len(self.uri) + 2:] + + @classmethod + def split_tag(cls, tag): + if '{' != tag[0]: + raise ValueError + end = tag.find('}') + if end < 0: + raise ValueError + return cls(tag[1:end]), tag[end + 1:] + + @classmethod + def tagname(cls, tag): + return cls.split_tag(tag)[1] + + +class EmptyNamespace(XMLNamespace): + def __init__(self): + super(EmptyNamespace, self).__init__('') + + def __call__(self, tag): + return tag + +# some common namespaces we use +RDFNS = XMLNamespace('http://www.w3.org/1999/02/22-rdf-syntax-ns#') +DCNS = XMLNamespace('http://purl.org/dc/elements/1.1/') +MARCRELNS = XMLNamespace('http://www.loc.gov/loc.terms/relators/') + +XINS = XMLNamespace("http://www.w3.org/2001/XInclude") +XHTMLNS = XMLNamespace("http://www.w3.org/1999/xhtml") + +common_uris = { + RDFNS.uri: 'rdf', + DCNS.uri: 'dc', + MARCRELNS.uri: 'marcrel', +} + +common_prefixes = dict((i[1], i[0]) for i in common_uris.items()) + +class NamespaceProxy(object): + + def __init__(self, desc, uri): + object.__setattr__(self, 'uri', uri) + object.__setattr__(self, 'desc', desc) + + def __getattr__(self, key): + return object.__getattribute__(self, 'desc')[self.uri, key] + + def __setattr__(self, key, value): + object.__getattribute__(self, 'desc')[self.uri, key] = value + + def __iter__(self): + return ((XMLNamespace.tagname(attr.schema.name), attr.value) for attr in object.__getattribute__(self, 'desc').attrs.filter(schema__name__startswith="{%s}" % self.uri)) + +class NamespaceDescriptor(object): + + def __init__(self, nsuri): + self.nsuri = nsuri + + def __get__(self, instance, owner): + if instance is None: + return self + return NamespaceProxy(instance, self.nsuri) + + def __set__(self, instance, value): + raise ValueError + + + diff --git a/apps/dcmeta/views.py b/apps/dcmeta/views.py new file mode 100644 index 00000000..60f00ef0 --- /dev/null +++ b/apps/dcmeta/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/apps/dvcs/models.py b/apps/dvcs/models.py index 01ab0389..47e7c26d 100644 --- a/apps/dvcs/models.py +++ b/apps/dvcs/models.py @@ -10,7 +10,7 @@ class Change(models.Model): argument points to the version against which this change has been recorded. Initial text will have a null parent. - Data contains a reverse diff needed to reproduce the initial document. + Data contains a pickled diff needed to reproduce the initial document. """ author = models.ForeignKey(User) patch = models.TextField(blank=True) @@ -31,7 +31,7 @@ class Change(models.Model): ordering = ('created_at',) def __unicode__(self): - return "Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch) + return u"Id: %r, Tree %r, Parent %r, Patch '''\n%s'''" % (self.id, self.tree_id, self.parent_id, self.patch) @staticmethod def make_patch(src, dst): @@ -41,26 +41,25 @@ class Change(models.Model): changes = Change.objects.exclude(parent=None).filter( tree=self.tree, created_at__lte=self.created_at).order_by('created_at') - text = '' + text = u'' for change in changes: text = change.apply_to(text) return text - def make_child(self, patch, description): + def make_child(self, patch, author, description): return self.children.create(patch=patch, - tree=self.tree, + tree=self.tree, author=author, description=description) - def make_merge_child(self, patch, description): + def make_merge_child(self, patch, author, description): return self.merge_children.create(patch=patch, - tree=self.tree, + tree=self.tree, author=author, description=description) def apply_to(self, text): return mdiff.patch(text, pickle.loads(self.patch.encode('ascii'))) - - def merge_with(self, other): + def merge_with(self, other, author, description=u"Automatic merge."): assert self.tree_id == other.tree_id # same tree if other.parent_id == self.pk: # immediate child @@ -74,14 +73,13 @@ class Change(models.Model): result = ''.join(merge.merge_lines()) patch = self.make_patch(local, result) return self.children.create( - patch=patch, - merge_parent=other, tree=self.tree, description=u"Automatic merge") + patch=patch, merge_parent=other, tree=self.tree, + author=author, description=description) class Document(models.Model): """ - File in repository. - + File in repository. """ creator = models.ForeignKey(User) head = models.ForeignKey(Change, @@ -93,7 +91,14 @@ class Document(models.Model): help_text=_("Name for this file to display.")) def __unicode__(self): - return "{0}, HEAD: {1}".format(self.name, self.head_id) + return u"{0}, HEAD: {1}".format(self.name, self.head_id) + + @models.permalink + def get_absolute_url(self): + return ('dvcs.views.document_data', (), { + 'document_id': self.id, + 'version': self.head_id, + }) def materialize(self, version=None): if self.head is None: @@ -123,11 +128,11 @@ class Document(models.Model): old_head = self.head if parent != old_head: - change = parent.make_merge_child(patch, kwargs.get('description', '')) + change = parent.make_merge_child(patch, kwargs['author'], kwargs.get('description', '')) # not Fast-Forward - perform a merge - self.head = old_head.merge_with(change) + self.head = old_head.merge_with(change, author=kwargs['author']) else: - self.head = parent.make_child(patch, kwargs.get('description', '')) + self.head = parent.make_child(patch, kwargs['author'], kwargs.get('description', '')) self.save() return self.head diff --git a/apps/dvcs/tests.py b/apps/dvcs/tests.py index af19d782..0c712957 100644 --- a/apps/dvcs/tests.py +++ b/apps/dvcs/tests.py @@ -1,40 +1,44 @@ from django.test import TestCase from dvcs.models import Change, Document +from django.contrib.auth.models import User class DocumentModelTests(TestCase): + def setUp(self): + self.user = User.objects.create_user("tester", "tester@localhost.local") + def assertTextEqual(self, given, expected): return self.assertEqual(given, expected, "Expected '''%s'''\n differs from text: '''%s'''" % (expected, given) ) def test_empty_file(self): - doc = Document.objects.create(name=u"Sample Document") + doc = Document.objects.create(name=u"Sample Document", creator=self.user) self.assert_(doc.head is not None) self.assertEqual(doc.materialize(), u"") def test_single_commit(self): - doc = Document.objects.create(name=u"Sample Document") - doc.commit(text=u"Ala ma kota", description="Commit #1") + doc = Document.objects.create(name=u"Sample Document", creator=self.user) + doc.commit(text=u"Ala ma kota", description="Commit #1", author=self.user) self.assert_(doc.head is not None) self.assertEqual(doc.change_set.count(), 2) self.assertEqual(doc.materialize(), u"Ala ma kota") def test_chained_commits(self): - doc = Document.objects.create(name=u"Sample Document") + doc = Document.objects.create(name=u"Sample Document", creator=self.user) c1 = doc.commit(description="Commit #1", text=u""" Line #1 Line #2 is cool - """) + """, author=self.user) c2 = doc.commit(description="Commit #2", text=u""" Line #1 Line #2 is hot - """) + """, author=self.user) c3 = doc.commit(description="Commit #3", text=u""" Line #1 ... is hot Line #3 ate Line #2 - """) + """, author=self.user) self.assert_(doc.head is not None) self.assertEqual(doc.change_set.count(), 4) @@ -59,18 +63,18 @@ class DocumentModelTests(TestCase): def test_parallel_commit_noconflict(self): - doc = Document.objects.create(name=u"Sample Document") + doc = Document.objects.create(name=u"Sample Document", creator=self.user) self.assert_(doc.head is not None) base = doc.head base = doc.commit(description="Commit #1", text=u""" Line #1 Line #2 -""") +""", author=self.user) c1 = doc.commit(description="Commit #2", text=u""" Line #1 is hot Line #2 -""", parent=base) +""", parent=base, author=self.user) self.assertTextEqual(c1.materialize(), u""" Line #1 is hot Line #2 @@ -79,7 +83,7 @@ class DocumentModelTests(TestCase): Line #1 Line #2 Line #3 -""", parent=base) +""", parent=base, author=self.user) self.assertEqual(doc.change_set.count(), 5) self.assertTextEqual(doc.materialize(), u""" Line #1 is hot @@ -88,25 +92,25 @@ class DocumentModelTests(TestCase): """) def test_parallel_commit_conflict(self): - doc = Document.objects.create(name=u"Sample Document") + doc = Document.objects.create(name=u"Sample Document", creator=self.user) self.assert_(doc.head is not None) base = doc.head base = doc.commit(description="Commit #1", text=u""" Line #1 Line #2 Line #3 -""") +""", author=self.user) c1 = doc.commit(description="Commit #2", text=u""" Line #1 Line #2 is hot Line #3 -""", parent=base) +""", parent=base, author=self.user) c2 = doc.commit(description="Commit #3", text=u""" Line #1 Line #2 is cool Line #3 -""", parent=base) +""", parent=base, author=self.user) self.assertEqual(doc.change_set.count(), 5) self.assertTextEqual(doc.materialize(), u""" Line #1 @@ -116,5 +120,45 @@ Line #2 is hot Line #2 is cool >>>>>>> Line #3 +""") + + def test_multiply_parallel_commits(self): + doc = Document.objects.create(name=u"Sample Document", creator=self.user) + self.assert_(doc.head is not None) + c1 = doc.commit(description="Commit A1", text=u""" +Line #1 + +Line #2 + +Line #3 +""", author=self.user) + c2 = doc.commit(description="Commit A2", text=u""" +Line #1 * + +Line #2 + +Line #3 +""", author=self.user) + c3 = doc.commit(description="Commit B1", text=u""" +Line #1 + +Line #2 ** + +Line #3 +""", parent=c1, author=self.user) + c4 = doc.commit(description="Commit C1", text=u""" +Line #1 * + +Line #2 + +Line #3 *** +""", parent=c2, author=self.user) + self.assertEqual(doc.change_set.count(), 7) + self.assertTextEqual(doc.materialize(), u""" +Line #1 * + +Line #2 ** + +Line #3 *** """) diff --git a/apps/dvcs/urls.py b/apps/dvcs/urls.py new file mode 100644 index 00000000..d1e1e296 --- /dev/null +++ b/apps/dvcs/urls.py @@ -0,0 +1,6 @@ +# -*- coding: utf-8 +from django.conf.urls.defaults import * + +urlpatterns = patterns('dvcs.views', + url(r'^data/(?P[^/]+)/(?P.*)$', 'document_data', name='storage_document_data'), +) diff --git a/apps/dvcs/views.py b/apps/dvcs/views.py index 03b258a0..7918e96c 100644 --- a/apps/dvcs/views.py +++ b/apps/dvcs/views.py @@ -1,5 +1,6 @@ # Create your views here. from django.views.generic.simple import direct_to_template +from django import http from dvcs.models import Document def document_list(request, template_name="dvcs/document_list.html"): @@ -7,6 +8,10 @@ def document_list(request, template_name="dvcs/document_list.html"): "documents": Document.objects.all(), }) +def document_data(request, document_id, version=None): + doc = Document.objects.get(pk=document_id) + return http.HttpResponse(doc.materialize(version or None), content_type="text/plain") + def document_history(request, docid, template_name="dvcs/document_history.html"): document = Document.objects.get(pk=docid) return direct_to_template(request, template_name, { diff --git a/apps/newwiki/__init__.py b/apps/newwiki/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/apps/newwiki/models.py b/apps/newwiki/models.py new file mode 100644 index 00000000..71a83623 --- /dev/null +++ b/apps/newwiki/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/apps/newwiki/tests.py b/apps/newwiki/tests.py new file mode 100644 index 00000000..2247054b --- /dev/null +++ b/apps/newwiki/tests.py @@ -0,0 +1,23 @@ +""" +This file demonstrates two different styles of tests (one doctest and one +unittest). These will both pass when you run "manage.py test". + +Replace these with more appropriate tests for your application. +""" + +from django.test import TestCase + +class SimpleTest(TestCase): + def test_basic_addition(self): + """ + Tests that 1 + 1 always equals 2. + """ + self.failUnlessEqual(1 + 1, 2) + +__test__ = {"doctest": """ +Another way to test that 1 + 1 is equal to 2. + +>>> 1 + 1 == 2 +True +"""} + diff --git a/apps/newwiki/urls.py b/apps/newwiki/urls.py new file mode 100644 index 00000000..044ea380 --- /dev/null +++ b/apps/newwiki/urls.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 +from django.conf.urls.defaults import * +from django.views.generic.simple import redirect_to +from django.conf import settings + + +PART = ur"""[ ĄĆĘŁŃÓŚŻŹąćęłńóśżź0-9\w_.-]+""" + +urlpatterns = patterns('newwiki.views', + url(r'^$', redirect_to, {'url': 'documents/', 'permanent': False}), + + url(r'^documents/$', 'document_list', name='wiki_document_list'), + # url(r'^documents/(?P.+)$', 'document_list'), + + url(r'^edit/(?P.+)$', 'editor', name="wiki_editor"), + +# url(r'^catalogue/([^/]+)/([^/]+)/$', 'document_list'), +# url(r'^catalogue/([^/]+)/([^/]+)/([^/]+)$', 'document_list'), +# +# url(r'^(?P%s)$' % PART, +# 'editor', name="wiki_editor"), +# +# url(r'^(?P[^/]+)/readonly$', +# 'editor_readonly', name="wiki_editor_readonly"), +# +# url(r'^create/(?P[^/]+)', +# 'create_missing', name='wiki_create_missing'), +# +# url(r'^(?P[^/]+)/gallery$', +# 'gallery', name="wiki_gallery"), +# +# url(r'^(?P[^/]+)/history$', +# 'history', name="wiki_history"), +# +# url(r'^(?P[^/]+)/text$', +# 'text', name="wiki_text"), +# +# url(r'^(?P[^/]+)/publish$', 'publish', name="wiki_publish"), +# url(r'^(?P[^/]+)/publish/(?P\d+)$', 'publish', name="wiki_publish"), +# +# url(r'^(?P[^/]+)/diff$', 'diff', name="wiki_diff"), +# url(r'^(?P[^/]+)/tags$', 'add_tag', name="wiki_add_tag"), + + + +) diff --git a/apps/newwiki/views.py b/apps/newwiki/views.py new file mode 100644 index 00000000..e4bbab22 --- /dev/null +++ b/apps/newwiki/views.py @@ -0,0 +1,46 @@ +from django.conf import settings + +from django import http + +# Views +from django.views.generic.simple import direct_to_template + +# Decorators +from django.contrib.auth.decorators import login_required +from django.views.decorators.http import require_POST, require_GET +from django.views.decorators.cache import never_cache + +# Models +from django.contrib.contenttypes.models import ContentType +from dvcs.models import Document +from dcmeta.models import Description + +@never_cache +def document_list(request): + return direct_to_template(request, + 'wiki/document_list.html', extra_context={ + 'docs': Document.objects.all(), + }) + + +@never_cache +def editor(request, document_id, template_name='wiki/document_details.html'): + + try: + doc = Document.objects.get(pk=document_id) + except Document.DoesNotExist: + raise http.Http404 + + meta_data = Description.objects.get(object_id=document_id, + content_type=ContentType.objects.get_for_model(doc)) + + return direct_to_template(request, template_name, extra_context={ + 'document': doc, + 'document_name': doc.name, + 'document_info': meta_data, + 'document_meta': meta_data, +# 'forms': { +# "text_save": DocumentTextSaveForm(prefix="textsave"), +# "add_tag": DocumentTagForm(prefix="addtag"), +# }, + }) diff --git a/apps/wiki/templates/wiki/document_list.html b/apps/wiki/templates/wiki/document_list.html index 6853801f..ec4116e5 100644 --- a/apps/wiki/templates/wiki/document_list.html +++ b/apps/wiki/templates/wiki/document_list.html @@ -33,8 +33,8 @@ $(function() { {% for doc in docs %} - {{ doc|wiki_title }} + {{ doc }} {% endfor %} diff --git a/apps/wiki/views.py b/apps/wiki/views.py index a1c3097c..451034dc 100644 --- a/apps/wiki/views.py +++ b/apps/wiki/views.py @@ -49,44 +49,6 @@ def normalized_name(view): return decorated -@never_cache -def document_list(request): - return direct_to_template(request, 'wiki/document_list.html', extra_context={ - 'docs': getstorage().all(), - 'last_docs': sorted(request.session.get("wiki_last_docs", {}).items(), - key=operator.itemgetter(1), reverse=True), - }) - - -@never_cache -@normalized_name -def editor(request, name, template_name='wiki/document_details.html'): - storage = getstorage() - - try: - document = storage.get(name) - except DocumentNotFound: - return http.HttpResponseRedirect(reverse("wiki_create_missing", args=[name])) - - access_time = datetime.now() - last_documents = request.session.get("wiki_last_docs", {}) - last_documents[name] = access_time - - if len(last_documents) > MAX_LAST_DOCS: - oldest_key = min(last_documents, key=last_documents.__getitem__) - del last_documents[oldest_key] - request.session['wiki_last_docs'] = last_documents - - return direct_to_template(request, template_name, extra_context={ - 'document': document, - 'document_name': document.name, - 'document_info': document.info, - 'document_meta': document.meta, - 'forms': { - "text_save": DocumentTextSaveForm(prefix="textsave"), - "add_tag": DocumentTagForm(prefix="addtag"), - }, - }) @require_GET diff --git a/redakcja/settings/common.py b/redakcja/settings/common.py index 846289ae..ca38a93a 100644 --- a/redakcja/settings/common.py +++ b/redakcja/settings/common.py @@ -116,7 +116,9 @@ INSTALLED_APPS = ( 'south', 'sorl.thumbnail', 'filebrowser', + 'eav', 'dvcs', + 'dcmeta', 'wiki', 'toolbar', diff --git a/redakcja/urls.py b/redakcja/urls.py index 191bcc2a..af85e5ce 100644 --- a/redakcja/urls.py +++ b/redakcja/urls.py @@ -19,7 +19,8 @@ urlpatterns = patterns('', (r'^admin/', include(admin.site.urls)), url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/documents/'}), - url(r'^documents/', include('wiki.urls')), + url(r'^wiki/', include('newwiki.urls')), + url(r'^storage/', include('dvcs.urls')), # Static files (should be served by Apache) url(r'^%s(?P.+)$' % settings.MEDIA_URL[1:], 'django.views.static.serve', @@ -28,7 +29,6 @@ urlpatterns = patterns('', {'document_root': settings.MEDIA_ROOT, 'show_indexes': True}), url(r'^%s(?P.+)$' % settings.STATIC_URL[1:], 'django.views.static.serve', {'document_root': settings.STATIC_ROOT, 'show_indexes': True}), - (r'^documents/', include(wiki.urls)), - url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/documents/'}), + url(r'^$', 'django.views.generic.simple.redirect_to', {'url': '/wiki/', 'permanent': False}), ) diff --git a/scripts/fix_links.py b/scripts/fix_links.py new file mode 100755 index 00000000..6f7bdb26 --- /dev/null +++ b/scripts/fix_links.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python +import argparse +import os +import sys +import re + +from librarian.parser import WLDocument + +_BASE = ur"""http://wiki.wolnepodreczniki.pl/(?:index.php\?title=)?Lektury(?::|/)""" + +ABOUT_PATTERNS = ( + ur"""%s(?P[^/]+)/?$""" % _BASE, + ur"""%s(?P<author>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE, + ur"""%s(?P<author>[^/]+)/(?P<collection>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE, + ur"""%s(?P<author>[^/]+)/(?P<collection>[^/]+)/(?P<part>[^/]+)/(?P<title>[^/]+)/?$""" % _BASE, +) + +def compile_patterns(patterns): + for p in patterns: + yield re.compile(p, re.UNICODE) + +def match_first(text, patterns): + for pattern in patterns: + m = pattern.match(text) + if m is not None: + return m.groups() + return False + + +class Task(object): + + def __init__(self): + self.documents = set() + self.invalid = set() + self.unrecognized = {} + self.duplicates = {} + self.about_patterns = list(compile_patterns(ABOUT_PATTERNS)) + + assert match_first("""http://wiki.wolnepodreczniki.pl/index.php?title=Lektury:Mickiewicz/%C5%9Amier%C4%87_Pu%C5%82kownika/""", self.about_patterns) + assert match_first("""http://wiki.wolnepodreczniki.pl/Lektury:Anonim/Ala""", self.about_patterns) + assert match_first("""http://wiki.wolnepodreczniki.pl/Lektury:Karpi%C5%84ski/Sielanki/Powr%C3%B3t_z_Warszawy_na_wie%C5%9B""", self.about_patterns) + + def read_file(self, path): + return WLDocument.from_file(path) + + def run(self): + for file in os.listdir(u"."): + try: + doc = self.read_file(file) + about_link = unicode(doc.book_info.about) + url = doc.book_info.url + if not about_link: + if not url: + self.invalid.add(file) + continue + self.unrecognized[file] = url + continue + + m = match_first(about_link, self.about_patterns) + if m: + if m in self.documents: + l = self.duplicates.get(m, []) + l.append(file) + self.duplicates[m] = l + else: + self.documents.add(m) + else: + self.unrecognized[file] = about_link + except Exception: + self.invalid.add(file) + + + + print u"""\ +{0} correct documents, +{1} invalid, +{2} unrecognized, +\t{unrecognized} +{3} duplicate names +\t{duplicates}""".format( + len(self.documents), + len(self.invalid), + len(self.unrecognized), + len(self.duplicates), + duplicates='\n\t'.join(repr(x) for x in self.duplicates.items()), + unrecognized='\n\t'.join(repr(x) for x in self.unrecognized.items()) + ) + + for doc in self.documents: + print u"http://redakcja.wolnelektury.pl/documents/{0}".format('/'.join(doc).lower()) + + +if __name__ == '__main__': + + task = Task() + task.run()