From: Marcin Koziej Date: Mon, 4 Nov 2013 16:12:54 +0000 (+0100) Subject: Accept dates in like "2 poł. XIX w." X-Git-Tag: 1.7~125^2 X-Git-Url: https://git.mdrn.pl/librarian.git/commitdiff_plain/00906f2fc0432cede204c7870e6caecf427d7024?ds=inline;hp=--cc Accept dates in like "2 poł. XIX w." --- 00906f2fc0432cede204c7870e6caecf427d7024 diff --git a/librarian/dcparser.py b/librarian/dcparser.py index d99aaf0..fff8ac2 100644 --- a/librarian/dcparser.py +++ b/librarian/dcparser.py @@ -6,6 +6,8 @@ from xml.parsers.expat import ExpatError from datetime import date import time +import re +from librarian.util import roman_to_int from librarian import (ValidationError, NoDublinCore, ParseError, DCNS, RDFNS, WLURI) @@ -62,10 +64,17 @@ class Person(object): def as_date(text): try: - try: - t = time.strptime(text, '%Y-%m-%d') - except ValueError: - t = time.strptime(text, '%Y') + # check out the "N. poł X w." syntax + m = re.match(u"([12]) *poł[.]? ([MCDXVI]+) .*[.]?", text) + if m: + half = int(m.groups()[0]) + century = roman_to_int(str(m.groups()[1])) + t = ((century*100 + (half-1)*50), 1, 1) + else: + try: + t = time.strptime(text, '%Y-%m-%d') + except ValueError: + t = time.strptime(text, '%Y') return date(t[0], t[1], t[2]) except ValueError, e: raise ValueError("Unrecognized date format. Try YYYY-MM-DD or YYYY.") diff --git a/librarian/util.py b/librarian/util.py new file mode 100644 index 0000000..396c84b --- /dev/null +++ b/librarian/util.py @@ -0,0 +1,117 @@ +# Functions to convert between integers and Roman numerals. Doctest examples included. +# by Paul Winkler +# http://code.activestate.com/recipes/81611-roman-numerals/ +# PSFL (GPL compatible) + +def int_to_roman(input): + """ + Convert an integer to Roman numerals. + + Examples: + >>> int_to_roman(0) + Traceback (most recent call last): + ValueError: Argument must be between 1 and 3999 + + >>> int_to_roman(-1) + Traceback (most recent call last): + ValueError: Argument must be between 1 and 3999 + + >>> int_to_roman(1.5) + Traceback (most recent call last): + TypeError: expected integer, got + + >>> for i in range(1, 21): print int_to_roman(i) + ... + I + II + III + IV + V + VI + VII + VIII + IX + X + XI + XII + XIII + XIV + XV + XVI + XVII + XVIII + XIX + XX + >>> print int_to_roman(2000) + MM + >>> print int_to_roman(1999) + MCMXCIX + """ + if type(input) != type(1): + raise TypeError, "expected integer, got %s" % type(input) + if not 0 < input < 4000: + raise ValueError, "Argument must be between 1 and 3999" + ints = (1000, 900, 500, 400, 100, 90, 50, 40, 10, 9, 5, 4, 1) + nums = ('M', 'CM', 'D', 'CD','C', 'XC','L','XL','X','IX','V','IV','I') + result = "" + for i in range(len(ints)): + count = int(input / ints[i]) + result += nums[i] * count + input -= ints[i] * count + return result + +def roman_to_int(input): + """ + Convert a roman numeral to an integer. + + >>> r = range(1, 4000) + >>> nums = [int_to_roman(i) for i in r] + >>> ints = [roman_to_int(n) for n in nums] + >>> print r == ints + 1 + + >>> roman_to_int('VVVIV') + Traceback (most recent call last): + ... + ValueError: input is not a valid roman numeral: VVVIV + >>> roman_to_int(1) + Traceback (most recent call last): + ... + TypeError: expected string, got + >>> roman_to_int('a') + Traceback (most recent call last): + ... + ValueError: input is not a valid roman numeral: A + >>> roman_to_int('IL') + Traceback (most recent call last): + ... + ValueError: input is not a valid roman numeral: IL + """ + if type(input) != type(""): + raise TypeError, "expected string, got %s" % type(input) + input = input.upper() + nums = ['M', 'D', 'C', 'L', 'X', 'V', 'I'] + ints = [1000, 500, 100, 50, 10, 5, 1] + places = [] + for c in input: + if not c in nums: + raise ValueError, "input is not a valid roman numeral: %s" % input + for i in range(len(input)): + c = input[i] + value = ints[nums.index(c)] + # If the next place holds a larger number, this value is negative. + try: + nextvalue = ints[nums.index(input[i +1])] + if nextvalue > value: + value *= -1 + except IndexError: + # there is no next place. + pass + places.append(value) + sum = 0 + for n in places: sum += n + # Easiest test for validity... + if int_to_roman(sum) == input: + return sum + else: + raise ValueError, 'input is not a valid roman numeral: %s' % input