+++ /dev/null
-use strict;
-use warnings;
-
-use Unicode::Normalize;
-
-print <<EOF;
-package Font::Subsetter::NormalizationData;
-use strict;
-use warnings;
-our \@data = (
-EOF
-
-# Output is:
-# [x, a,b,c...],
-# where the codepoint x is the NFC normalization of the string a,b,c
-
-my @data;
-
-open my $f, '/usr/lib/perl5/5.8.8/unicore/UnicodeData.txt' or die $!;
-while (<$f>) {
- my @c = split /;/, $_;
- # Find characters which canonically decompose (without any
- # compatibility tag "<foo>")
- next unless $c[5] and $c[5] !~ /^</;
-
- {
- # Print the character and its maximally-decomposed codepoints,
- # if they re-compose into it
- my @x = split //, Unicode::Normalize::NFD(chr hex $c[0]);
- push @data, [hex $c[0], map ord, @x]
- if Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
- }
-
- # Try to find all other strings that can become this character under NFC:
- # If the maximal decomposition is "abc", we might want the strings
- # "NFC(ab) c", "NFC(ac) b", etc, so attempt all permutations of abc
- # and then try all groupings to apply NFC to
-
- my @norm = split //, Unicode::Normalize::NFD(chr hex $c[0]);
- if (@norm == 3) {
- my ($a, $b, $c) = @norm;
- for my $cs (permut([$a, $b, $c], [])) { # all permutations
- for my $cs2 ('ab c', 'a bc') { # all groupings
- my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2;
- # If NFC didn't collapse everything into single characters, this string is not interesting
- next if grep length != 1, @x;
- # If the string doesn't NFC into the desired character, it's not interesting
- next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
- # This string is good
- push @data, [hex $c[0], map ord, @x];
- }
- }
- } elsif (@norm == 4) {
- my ($a, $b, $c, $d) = @norm;
- for my $cs (permut([$a, $b, $c, $d], [])) {
- for my $cs2 ('ab c d', 'a bc d', 'a b cd', 'ab cd', 'abc d', 'a bcd') {
- my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2;
- next if grep length != 1, @x;
- next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0];
- push @data, [hex $c[0], map ord, @x];
- }
- }
- } elsif (@norm > 4) {
- die "\@norm too big";
- }
-}
-
-print uniq(map "[".join(',', @$_)."],\n", @data);
-
-print <<EOF;
-);
-
-1;
-EOF
-
-sub permut {
- my @r;
- my @head = @{ $_[0] };
- my @tail = @{ $_[1] };
- unless (@head) {
- push @r, \@tail;
- } else {
- for my $i (0 .. $#head) {
- my @newhead = @head;
- my @newtail = @tail;
- unshift(@newtail, splice(@newhead, $i, 1));
- push @r, permut([@newhead], [@newtail]);
- }
- }
- return @r;
-}
-
-sub uniq {
- my @u;
- my %u;
- for (@_) {
- push @u, $_ unless $u{$_}++;
- }
- @u;
-}