1 package Font::TTF::Cmap;
5 Font::TTF::Cmap - Character map table
9 Looks after the character map. For ease of use, the actual cmap is held in
10 a hash against codepoint. Thus for a given table:
12 $gid = $font->{'cmap'}{'Tables'}[0]{'val'}{$code};
14 Note that C<$code> should be a true value (0x1234) rather than a string representation.
16 =head1 INSTANCE VARIABLES
18 The instance variables listed here are not preceeded by a space due to their
19 emulating structural information in the font.
25 Number of subtables in this table
29 An array of subtables ([0..Num-1])
33 Each subtables also has its own instance variables which are, again, not
40 The platform number for this subtable
44 The encoding number for this subtable
48 Gives the stored format of this subtable
52 Gives the version (or language) information for this subtable
56 A hash keyed by the codepoint value (not a string) storing the glyph id
69 @ISA = qw(Font::TTF::Table);
74 Reads the cmap into memory. Format 4 subtables read the whole subtable and
75 fill in the segmented array accordingly.
82 my ($dat, $i, $j, $k, $id, @ids, $s);
83 my ($start, $end, $range, $delta, $form, $len, $num, $ver, $sg);
84 my ($fh) = $self->{' INFILE'};
86 $self->SUPER::read or return $self;
88 $self->{'Num'} = unpack("x2n", $dat);
89 $self->{'Tables'} = [];
90 for ($i = 0; $i < $self->{'Num'}; $i++)
94 ($s->{'Platform'}, $s->{'Encoding'}, $s->{'LOC'}) = (unpack("nnN", $dat));
95 $s->{'LOC'} += $self->{' OFFSET'};
96 push(@{$self->{'Tables'}}, $s);
98 for ($i = 0; $i < $self->{'Num'}; $i++)
100 $s = $self->{'Tables'}[$i];
101 $fh->seek($s->{'LOC'}, 0);
103 $form = unpack("n", $dat);
105 $s->{'Format'} = $form;
111 ($len, $s->{'Ver'}) = unpack('n2', $dat);
112 $fh->read($dat, 256);
113 $s->{'val'} = {map {$j++; ($_ ? ($j - 1, $_) : ())} unpack("C*", $dat)};
116 my ($start, $ecount);
119 ($len, $s->{'Ver'}, $start, $ecount) = unpack('n4', $dat);
120 $fh->read($dat, $ecount << 1);
121 $s->{'val'} = {map {$start++; ($_ ? ($start - 1, $_) : ())} unpack("n*", $dat)};
122 } elsif ($form == 2) # Contributed by Huw Rogers
125 ($len, $s->{'Ver'}) = unpack('n2', $dat);
126 $fh->read($dat, 512);
127 my ($j, $k, $l, $m, $n, @subHeaderKeys, @subHeaders, $subHeader);
129 for ($j = 0; $j < 256; $j++) {
130 my $k = unpack('@'.($j<<1).'n', $dat)>>3;
131 $n = $k + 1 if $k >= $n;
132 $subHeaders[$subHeaderKeys[$j] = $k] ||= [ ];
134 $fh->read($dat, $n<<3); # read subHeaders[]
135 for ($k = 0; $k < $n; $k++) {
136 $subHeader = $subHeaders[$k];
138 @$subHeader = unpack('@'.$l.'n4', $dat);
139 $subHeader->[2] = unpack('s', pack('S', $subHeader->[2]))
140 if $subHeader->[2] & 0x8000; # idDelta
142 ($subHeader->[3] - (($n - $k)<<3) + 6)>>1; # idRangeOffset
144 $fh->read($dat, $len - ($n<<3) - 518); # glyphIndexArray[]
145 for ($j = 0; $j < 256; $j++) {
146 $k = $subHeaderKeys[$j];
147 $subHeader = $subHeaders[$k];
149 $l = $j - $subHeader->[0];
150 if ($l >= 0 && $l < $subHeader->[1]) {
151 $m = unpack('@'.(($l + $subHeader->[3])<<1).'n', $dat);
152 $m += $subHeader->[2] if $m;
153 $s->{'val'}{$j} = $m;
156 for ($l = 0; $l < $subHeader->[1]; $l++) {
157 $m = unpack('@'.(($l + $subHeader->[3])<<1).'n', $dat);
158 $m += $subHeader->[2] if $m;
159 $s->{'val'}{($j<<8) + $l + $subHeader->[0]} = $m;
166 ($len, $s->{'Ver'}, $num) = unpack('n3', $dat);
168 $fh->read($dat, $len - 14);
169 for ($j = 0; $j < $num; $j++)
171 $end = unpack("n", substr($dat, $j << 1, 2));
172 $start = unpack("n", substr($dat, ($j << 1) + ($num << 1) + 2, 2));
173 $delta = unpack("n", substr($dat, ($j << 1) + ($num << 2) + 2, 2));
174 $delta -= 65536 if $delta > 32767;
175 $range = unpack("n", substr($dat, ($j << 1) + $num * 6 + 2, 2));
176 for ($k = $start; $k <= $end; $k++)
178 if ($range == 0 || $range == 65535) # support the buggy FOG with its range=65535 for final segment
179 { $id = $k + $delta; }
181 { $id = unpack("n", substr($dat, ($j << 1) + $num * 6 +
182 2 + ($k - $start) * 2 + $range, 2)) + $delta; }
183 $id -= 65536 if $id >= 65536;
184 $s->{'val'}{$k} = $id if ($id);
187 } elsif ($form == 8 || $form == 12)
190 ($len, $s->{'Ver'}) = unpack('x2N2', $dat);
193 $fh->read($dat, 8196);
194 $num = unpack("N", substr($dat, 8192, 4)); # don't need the map
198 $num = unpack("N", $dat);
200 $fh->read($dat, 12 * $num);
201 for ($j = 0; $j < $num; $j++)
203 ($start, $end, $sg) = unpack("N3", substr($dat, $j * 12, 12));
204 for ($k = $start; $k <= $end; $k++)
205 { $s->{'val'}{$k} = $sg++; }
207 } elsif ($form == 10)
210 ($len, $s->{'Ver'}, $start, $num) = unpack('x2N4', $dat);
211 $fh->read($dat, $num << 1);
212 for ($j = 0; $j < $num; $j++)
213 { $s->{'val'}{$start + $j} = unpack("n", substr($dat, $j << 1, 2)); }
220 =head2 $t->ms_lookup($uni)
222 Finds a Unicode table, giving preference to the MS one, and looks up the given
223 Unicode codepoint in it to find the glyph id.
229 my ($self, $uni) = @_;
231 $self->find_ms || return undef unless (defined $self->{' mstable'});
232 return $self->{' mstable'}{'val'}{$uni};
238 Finds the a Unicode table, giving preference to the Microsoft one, and sets the C<mstable> instance variable
239 to it if found. Returns the table it finds.
245 my ($i, $s, $alt, $found);
247 return $self->{' mstable'} if defined $self->{' mstable'};
249 for ($i = 0; $i < $self->{'Num'}; $i++)
251 $s = $self->{'Tables'}[$i];
252 if ($s->{'Platform'} == 3)
254 $self->{' mstable'} = $s;
255 last if ($s->{'Encoding'} == 10);
256 $found = 1 if ($s->{'Encoding'} == 1);
257 } elsif ($s->{'Platform'} == 0 || ($s->{'Platform'} == 2 && $s->{'Encoding'} == 1))
260 $self->{' mstable'} = $alt if ($alt && !$found);
267 Returns the encoding of the microsoft table (0 => symbol, etc.). Returns undef if there is
277 return $self->{' mstable'}{'Encoding'}
278 if (defined $self->{' mstable'} && $self->{' mstable'}{'Platform'} == 3);
280 foreach $s (@{$self->{'Tables'}})
282 return $s->{'Encoding'} if ($s->{'Platform'} == 3);
290 Writes out a cmap table to a filehandle. If it has not been read, then
291 just copies from input file to output
297 my ($self, $fh) = @_;
298 my ($loc, $s, $i, $base_loc, $j, @keys);
300 return $self->SUPER::out($fh) unless $self->{' read'};
303 $self->{'Tables'} = [sort {$a->{'Platform'} <=> $b->{'Platform'}
304 || $a->{'Encoding'} <=> $b->{'Encoding'}
305 || $a->{'Ver'} <=> $b->{'Ver'}} @{$self->{'Tables'}}];
306 $self->{'Num'} = scalar @{$self->{'Tables'}};
308 $base_loc = $fh->tell();
309 $fh->print(pack("n2", 0, $self->{'Num'}));
311 for ($i = 0; $i < $self->{'Num'}; $i++)
312 { $fh->print(pack("nnN", $self->{'Tables'}[$i]{'Platform'}, $self->{'Tables'}[$i]{'Encoding'}, 0)); }
314 for ($i = 0; $i < $self->{'Num'}; $i++)
316 $s = $self->{'Tables'}[$i];
317 if ($s->{'Format'} < 8)
318 { @keys = sort {$a <=> $b} grep { $_ <= 0xFFFF} keys %{$s->{'val'}}; }
320 { @keys = sort {$a <=> $b} keys %{$s->{'val'}}; }
321 $s->{' outloc'} = $fh->tell();
322 if ($s->{'Format'} < 8)
323 { $fh->print(pack("n3", $s->{'Format'}, 0, $s->{'Ver'})); } # come back for length
325 { $fh->print(pack("n2N2", $s->{'Format'}, 0, 0, $s->{'Ver'})); }
327 if ($s->{'Format'} == 0)
329 $fh->print(pack("C256", @{$s->{'val'}}{0 .. 255}));
330 } elsif ($s->{'Format'} == 6)
332 $fh->print(pack("n2", $keys[0], $keys[-1] - $keys[0] + 1));
333 $fh->print(pack("n*", @{$s->{'val'}}{$keys[0] .. $keys[-1]}));
334 } elsif ($s->{'Format'} == 2) # Contributed by Huw Rogers
336 my ($g, $k, $h, $l, $m, $n);
337 my (@subHeaderKeys, @subHeaders, $subHeader, @glyphIndexArray);
339 @subHeaderKeys = (-1) x 256;
341 next unless defined($g = $s->{'val'}{$j});
344 if (($k = $subHeaderKeys[$h]) < 0) {
345 $subHeader = [ $l, 1, 0, 0, [ $g ] ];
346 $subHeaders[$k = $n++] = $subHeader;
347 $subHeaderKeys[$h] = $k;
349 $subHeader = $subHeaders[$k];
350 $m = ($l - $subHeader->[0] + 1) - $subHeader->[1];
351 $subHeader->[1] += $m;
352 push @{$subHeader->[4]}, (0) x ($m - 1), $g - $subHeader->[2];
355 @subHeaderKeys = map { $_ < 0 ? 0 : $_ } @subHeaderKeys;
356 $subHeader = $subHeaders[0];
358 push @glyphIndexArray, @{$subHeader->[4]};
359 splice(@$subHeader, 4);
361 my @subHeaders_ = sort {@{$a->[4]} <=> @{$b->[4]}} @subHeaders[1..$#subHeaders];
362 my ($f, $d, $r, $subHeader_);
363 for ($k = 0; $k < @subHeaders_; $k++) {
364 $subHeader = $subHeaders_[$k];
365 $f = $r = shift @{$subHeader->[4]};
366 $subHeader->[5] = join(':',
371 sprintf('-%04x', -$d) :
373 } @{$subHeader->[4]});
374 unshift @{$subHeader->[4]}, $f;
376 for ($k = 0; $k < @subHeaders_; $k++) {
377 $subHeader = $subHeaders_[$k];
378 next unless $subHeader->[4];
379 $subHeader->[3] = @glyphIndexArray;
380 push @glyphIndexArray, @{$subHeader->[4]};
381 for ($l = $k + 1; $l < @subHeaders_; $l++) {
382 $subHeader_ = $subHeaders_[$l];
383 next unless $subHeader_->[4];
384 $d = $subHeader_->[5];
385 if ($subHeader->[5] =~ /\Q$d\E/) {
386 my $o = length($`)/6; #`
388 $subHeader_->[4]->[$o] - $subHeader->[4]->[0];
389 $subHeader_->[3] = $subHeader->[3] + $o;
390 splice(@$subHeader_, 4);
393 splice(@$subHeader, 4);
396 $fh->print(pack('n*', map { $_<<3 } @subHeaderKeys));
397 for ($j = 0; $j < 256; $j++) {
398 $k = $subHeaderKeys[$j];
399 $subHeader = $subHeaders[$k];
401 for ($k = 0; $k < $n; $k++) {
402 $subHeader = $subHeaders[$k];
403 $fh->print(pack('n4',
406 $subHeader->[2] < 0 ?
407 unpack('S', pack('s', $subHeader->[2])) :
409 ($subHeader->[3]<<1) + (($n - $k)<<3) - 6
412 $fh->print(pack('n*', @glyphIndexArray));
413 } elsif ($s->{'Format'} == 4)
415 my ($num, $sRange, $eSel, $eShift, @starts, @ends, $doff);
416 my (@deltas, $delta, @range, $flat, $k, $segs, $count, $newseg, $v);
418 push(@keys, 0xFFFF) unless ($keys[-1] == 0xFFFF);
419 $newseg = 1; $num = 0;
420 for ($j = 0; $j <= $#keys && $keys[$j] <= 0xFFFF; $j++)
422 $v = $s->{'val'}{$keys[$j]} || 0;
428 push(@starts, $keys[$j]);
431 $delta = 0 if ($delta + $j - $doff != $v);
432 $flat = 0 if ($v == 0);
433 if ($j == $#keys || $keys[$j] + 1 != $keys[$j+1])
435 push (@ends, $keys[$j]);
436 push (@deltas, $delta ? $delta - $keys[$doff] : 0);
437 push (@range, $flat);
443 ($num, $sRange, $eSel, $eShift) = Font::TTF::Utils::TTF_bininfo($num, 2);
444 $fh->print(pack("n4", $num * 2, $sRange, $eSel, $eShift));
445 $fh->print(pack("n*", @ends));
446 $fh->print(pack("n", 0));
447 $fh->print(pack("n*", @starts));
448 $fh->print(pack("n*", @deltas));
451 for ($j = 0; $j < $num; $j++)
453 $delta = $deltas[$j];
454 if ($delta != 0 && $range[$j] == 1)
458 $range[$j] = ($count + $num - $j) << 1;
459 $count += $ends[$j] - $starts[$j] + 1;
463 $fh->print(pack("n*", @range));
465 for ($j = 0; $j < $num; $j++)
467 next if ($range[$j] == 0);
468 $fh->print(pack("n*", map {$_ || 0} @{$s->{'val'}}{$starts[$j] .. $ends[$j]}));
470 } elsif ($s->{'Format'} == 8 || $s->{'Format'} == 12)
472 my (@jobs, $start, $current, $curr_glyf, $map);
474 $current = 0; $curr_glyf = 0;
475 $map = "\000" x 8192;
480 if (defined $s->{'val'}{$j >> 16})
481 { $s->{'Format'} = 12; }
482 vec($map, $j >> 16, 1) = 1;
484 if ($j != $current + 1 || $s->{'val'}{$j} != $curr_glyf + 1)
486 push (@jobs, [$start, $current, $curr_glyf - ($current - $start)]) if (defined $start);
487 $start = $j; $current = $j; $curr_glyf = $s->{'val'}{$j};
490 $curr_glyf = $s->{'val'}{$j};
492 push (@jobs, [$start, $current, $curr_glyf - ($current - $start)]) if (defined $start);
493 $fh->print($map) if ($s->{'Format'} == 8);
494 $fh->print(pack('N', $#jobs + 1));
496 { $fh->print(pack('N3', @{$j})); }
497 } elsif ($s->{'Format'} == 10)
499 $fh->print(pack('N2', $keys[0], $keys[-1] - $keys[0] + 1));
500 $fh->print(pack('n*', $s->{'val'}{$keys[0] .. $keys[-1]}));
504 if ($s->{'Format'} < 8)
506 $fh->seek($s->{' outloc'} + 2, 0);
507 $fh->print(pack("n", $loc - $s->{' outloc'}));
510 $fh->seek($s->{' outloc'} + 4, 0);
511 $fh->print(pack("N", $loc - $s->{' outloc'}));
513 $fh->seek($base_loc + 8 + ($i << 3), 0);
514 $fh->print(pack("N", $s->{' outloc'} - $base_loc));
521 =head2 $t->XML_element($context, $depth, $name, $val)
523 Outputs the elements of the cmap in XML. We only need to process val here
529 my ($self, $context, $depth, $k, $val) = @_;
530 my ($fh) = $context->{'fh'};
533 return $self if ($k eq 'LOC');
534 return $self->SUPER::XML_element($context, $depth, $k, $val) unless ($k eq 'val');
536 $fh->print("$depth<mappings>\n");
537 foreach $i (sort {$a <=> $b} keys %{$val})
538 { $fh->printf("%s<map code='%04X' glyph='%s'/>\n", $depth . $context->{'indent'}, $i, $val->{$i}); }
539 $fh->print("$depth</mappings>\n");
543 =head2 @map = $t->reverse(%opt)
545 Returns a reverse map of the Unicode cmap. I.e. given a glyph gives the Unicode value for it. Options are:
551 Table number to use rather than the default Unicode table
555 Returns each element of reverse as an array since a glyph may be mapped by more
556 than one Unicode value. The arrays are unsorted. Otherwise store any one unicode value for a glyph.
564 my ($self, %opt) = @_;
565 my ($table) = defined $opt{'tnum'} ? $self->{'Tables'}[$opt{'tnum'}] : $self->find_ms;
566 my (@res, $code, $gid);
568 while (($code, $gid) = each(%{$table->{'val'}}))
571 { push (@{$res[$gid]}, $code); }
573 { $res[$gid] = $code unless (defined $res[$gid] && $res[$gid] > 0 && $res[$gid] < $code); }
579 =head2 is_unicode($index)
581 Returns whether the table of a given index is known to be a unicode table
582 (as specified in the specifications)
588 my ($self, $index) = @_;
589 my ($pid, $eid) = ($self->{'Tables'}[$index]{'Platform'}, $self->{'Tables'}[$index]{'Encoding'});
591 return ($pid == 3 || $pid == 0 || ($pid == 2 && $eid == 1));
602 No support for format 2 tables (MBCS)
608 Martin Hosken Martin_Hosken@sil.org. See L<Font::TTF::Font> for copyright and