4 use Unicode::Normalize;
7 package Font::Subsetter::NormalizationData;
15 # where the codepoint x is the NFC normalization of the string a,b,c
19 open my $f, '/usr/lib/perl5/5.8.8/unicore/UnicodeData.txt' or die $!;
21 my @c = split /;/, $_;
22 # Find characters which canonically decompose (without any
23 # compatibility tag "<foo>")
24 next unless $c[5] and $c[5] !~ /^</;
27 # Print the character and its maximally-decomposed codepoints,
28 # if they re-compose into it
29 my @x = split //, Unicode::Normalize::NFD(chr hex $c[0]);
30 push @data, [hex $c[0], map ord, @x]
31 if Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
34 # Try to find all other strings that can become this character under NFC:
35 # If the maximal decomposition is "abc", we might want the strings
36 # "NFC(ab) c", "NFC(ac) b", etc, so attempt all permutations of abc
37 # and then try all groupings to apply NFC to
39 my @norm = split //, Unicode::Normalize::NFD(chr hex $c[0]);
41 my ($a, $b, $c) = @norm;
42 for my $cs (permut([$a, $b, $c], [])) { # all permutations
43 for my $cs2 ('ab c', 'a bc') { # all groupings
44 my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2;
45 # If NFC didn't collapse everything into single characters, this string is not interesting
46 next if grep length != 1, @x;
47 # If the string doesn't NFC into the desired character, it's not interesting
48 next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
50 push @data, [hex $c[0], map ord, @x];
53 } elsif (@norm == 4) {
54 my ($a, $b, $c, $d) = @norm;
55 for my $cs (permut([$a, $b, $c, $d], [])) {
56 for my $cs2 ('ab c d', 'a bc d', 'a b cd', 'ab cd', 'abc d', 'a bcd') {
57 my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2;
58 next if grep length != 1, @x;
59 next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
60 push @data, [hex $c[0], map ord, @x];
68 print uniq(map "[".join(',', @$_)."],\n", @data);
78 my @head = @{ $_[0] };
79 my @tail = @{ $_[1] };
83 for my $i (0 .. $#head) {
86 unshift(@newtail, splice(@newhead, $i, 1));
87 push @r, permut([@newhead], [@newtail]);
97 push @u, $_ unless $u{$_}++;