Changeset 19401
- Timestamp:
- 01/10/08 03:46:44 (12 months ago)
- Files:
-
- 1 modified
-
misc/pX/Common/Unicode/Unicode.pm (modified) (10 diffs)
Legend:
- Unmodified
- Added
- Removed
-
misc/pX/Common/Unicode/Unicode.pm
r19386 r19401 141 141 <punct space title upper xdigit word vspace hspace>; 142 142 BEGIN { 143 for @gen_cats, @proplist_cats, @perl_cats -> my Str $cat {144 %category{$cat} = Utable.new;145 }146 143 @mktab_subs.push: my sub mktab_proplist(-->) { 147 144 process_file 'ucd/Proplist.txt', rule { $<cr>=<UCD::code_or_range> ';' $<name>=(\w+) }, { … … 201 198 my Int %ccc; 202 199 # 4 Bidi_Class 203 my Utable $bidi_class .=new;200 my Utable $bidi_class; 204 201 # 5 Decomposition_Mapping 205 202 my Str %compat_decomp; … … 211 208 my Hash of Num %numeric; 212 209 # 9 Bidi_Mirrored 213 my Utable $bidi_mirrored .=new;210 my Utable $bidi_mirrored; 214 211 # 10 Unicode_1_Name 215 212 # is this needed for anything other than name lookups? … … 279 276 } 280 277 %category<word>.add('_'.ord); 278 # most of these aren't listed in UnicodeData.txt. is there other stuff like this? 279 # AC00;<Hangul Syllable, First>;Lo;0;L;;;;;N;;;;; 280 # D7A3;<Hangul Syllable, Last>;Lo;0;L;;;;;N;;;;; 281 %category<Lo>.add(0xAC00..0xD7A3); 282 $bidi_class.add(0xAC00..0xD7A3, :val<L>); 281 283 282 284 dumphash( :%category); … … 327 329 # 1 block name 328 330 # what do we do with block names? 329 my Utable $blockname .=new;331 my Utable $blockname; 330 332 BEGIN { 331 333 @mktab_subs.push: my sub mktab_blocks(-->) { … … 338 340 339 341 # CompositionExclusions.txt 340 # 0 code 341 my Bool %compex; 342 BEGIN { 343 @mktab_subs.push: my sub mktab_compex(-->) { 344 process_file 'ucd/CompositionExclusions.txt', rule { <c=UCD::code> }, { 345 %compex{$<c><str>}++; 346 } 347 dumphash(:%compex); 348 } 349 } 342 # DerivedNormalizationProps.txt has the full list 350 343 351 344 # CaseFolding.txt … … 595 588 596 589 # DerivedNormalizationProps.txt 590 my Utable $compex; 591 my Str %composition; 592 BEGIN { 593 @mktab_subs.push: my sub mktab_compex(-->) { 594 process_file 'ucd/DerivedNormalizationProps.txt', -> my Str $line { 595 if $line ~~ rule { <r=UCD::code_or_range> ';' Full_Composition_Exclusion } { 596 $compex.add($<r><ord>); 597 } 598 } 599 for %canon_decomp.keys -> my Str $s { 600 next if $compex.contains($s.ord); 601 %composition{%canon_decomp{$s}.nfd} = $s; 602 } 603 dumphash(:%composition); 604 } 605 } 597 606 598 607 # GraphemeBreakProperty.txt … … 754 763 sub hangul_decomp(Str $s --> Str) { 755 764 my Int $o = $s.ord; 756 return $s if $o < 0xAC00 or $o >= 0xD7A4; 765 # just returns $s for most $s 766 return $s if $o !~~ 0xAC00..0xD7A3; 757 767 my Str $ret; 758 $ret ~= ((($o - 0xAC00) / 0x2BA4) + 0x1100).chr; 759 $ret ~= ((((($o - 0xAC00) % 0x2BA4) / 28 ) + 0x1161 ).chr; 760 my Int $t = (($o - 0xAC00) % 28 ) + 0x11A7; 768 $o -= 0xAC00; 769 $ret ~= (($o / 0x2BA4) + 0x1100).chr; 770 $ret ~= (((($o % 0x2BA4) / 28 ) + 0x1161 ).chr; 771 my Int $t = ($o % 28 ) + 0x11A7; 761 772 $ret ~= $t.chr if $t != 0x11A7; 762 773 return $ret; … … 782 793 return $new.reorder; 783 794 } 784 our multi method nfc(Str $string: --> Str) is export { 785 ...; 795 sub compose_hangul(Str $s --> Str) { 796 my Str $ret = $s.code_n(0); 797 my Str $prev = $ret; 798 for 1..$s.codes -> my Int $n { 799 my Str $c = $s.code_n($n); 800 if $prev.ord ~~ 0x1100..0x1112 801 and $c.ord ~~ 0x1161..0x1175 { 802 $prev = ((($prev.ord - 0x1100) * 21 + ($c.ord - 0x1161)) * 28) + 0xAC00; 803 $ret.code_n($ret.codes-1) = $prev; 804 next; 805 } 806 if $prev.ord ~~ 0xAC00..0xD7A3 807 and ($prev.ord - 0xAC00) % 28 == 0 808 and $c.ord ~~ 0x11A7..0x11C2 { 809 $prev = ($prev.ord + $c.ord - 0x11A7).chr; 810 $ret.code_n($ret.codes-1) = $prev; 811 next; 812 } 813 } 814 $prev = $c; 815 $ret ~= $c; 816 } 817 return $ret; 818 } 819 sub compose_graph(Str $s --> Str) { 820 return %composition{$s} // $s 821 if $s.codes == 1; 822 return compose_hangul($s) if $s ~~ &isGCBHangulSyllable; 823 my Str $ret = $s; 824 startover: 825 if exists %composition{$s.code_n(0)} { 826 $ret = %composition{$s.code_n(0)} ~ $s.substr($s.as_codes[1]); 827 goto startover; 828 } 829 for 1..$ret.codes -> my Int $n { 830 if $ret.code_n(0) ~ $ret.code_n($n) eq any %composition.keys { 831 my Str $new = %composition{$ret.code_n(0) ~ $ret.code_n($n)}; 832 for 1..$ret.codes -> my Int $m { 833 next if $n == $m; 834 $new ~= $ret.code_n($m); 835 } 836 $ret = $new; 837 goto startover; 838 } 839 } 840 return $ret; 786 841 } 787 842 our multi method nfkd(Str $string: --> Str) is export { … … 799 854 return $new.reorder; 800 855 } 856 our multi method nfc(Str $string: --> Str) is export { 857 my Str $ret; 858 for ^$string.nfd.graphs -> my Int $n { 859 $ret ~= compose_graph($string.graph_n($n)); 860 861 } 862 return $ret; 863 } 801 864 our multi method nfkc(Str $string: --> Str) is export { 802 ...; 865 my Str $ret; 866 for ^$string.nfkd.graphs -> my Int $n { 867 $ret ~= compose_graph($string.graph_n($n)); 868 869 } 870 return $ret; 803 871 } 804 872
