X-Git-Url: https://git.mdrn.pl/librarian.git/blobdiff_plain/e316fc14bef26f958937aec0e6854b61f71a3b34..09dded3d8606e8e4406fffcf477ceb4a1c97fee2:/librarian/font-optimizer/Font/Subsetter/create-data.pl diff --git a/librarian/font-optimizer/Font/Subsetter/create-data.pl b/librarian/font-optimizer/Font/Subsetter/create-data.pl new file mode 100644 index 0000000..e0a38fb --- /dev/null +++ b/librarian/font-optimizer/Font/Subsetter/create-data.pl @@ -0,0 +1,100 @@ +use strict; +use warnings; + +use Unicode::Normalize; + +print <) { + my @c = split /;/, $_; + # Find characters which canonically decompose (without any + # compatibility tag "") + next unless $c[5] and $c[5] !~ /^[ord($1)-ord('a')]/eg; $_ } split / /, $cs2; + # If NFC didn't collapse everything into single characters, this string is not interesting + next if grep length != 1, @x; + # If the string doesn't NFC into the desired character, it's not interesting + next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0]; + # This string is good + push @data, [hex $c[0], map ord, @x]; + } + } + } elsif (@norm == 4) { + my ($a, $b, $c, $d) = @norm; + for my $cs (permut([$a, $b, $c, $d], [])) { + for my $cs2 ('ab c d', 'a bc d', 'a b cd', 'ab cd', 'abc d', 'a bcd') { + my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2; + next if grep length != 1, @x; + next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0]; + push @data, [hex $c[0], map ord, @x]; + } + } + } elsif (@norm > 4) { + die "\@norm too big"; + } +} + +print uniq(map "[".join(',', @$_)."],\n", @data); + +print <