use strict; use warnings; use Unicode::Normalize; print <) { my @c = split /;/, $_; # Find characters which canonically decompose (without any # compatibility tag "") next unless $c[5] and $c[5] !~ /^[ord($1)-ord('a')]/eg; $_ } split / /, $cs2; # If NFC didn't collapse everything into single characters, this string is not interesting next if grep length != 1, @x; # If the string doesn't NFC into the desired character, it's not interesting next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0]; # This string is good push @data, [hex $c[0], map ord, @x]; } } } elsif (@norm == 4) { my ($a, $b, $c, $d) = @norm; for my $cs (permut([$a, $b, $c, $d], [])) { for my $cs2 ('ab c d', 'a bc d', 'a b cd', 'ab cd', 'abc d', 'a bcd') { my @x = map Unicode::Normalize::NFC($_), map { s/(.)/$cs->[ord($1)-ord('a')]/eg; $_ } split / /, $cs2; next if grep length != 1, @x; next unless Unicode::Normalize::NFC(join '', @x) eq chr hex $c[0]; push @data, [hex $c[0], map ord, @x]; } } } elsif (@norm > 4) { die "\@norm too big"; } } print uniq(map "[".join(',', @$_)."],\n", @data); print <