1*cdf0e10cSrcweir#
2*cdf0e10cSrcweir#   Copyright (C) 2002-2009, International Business Machines Corporation and others.
3*cdf0e10cSrcweir#       All Rights Reserved.
4*cdf0e10cSrcweir#
5*cdf0e10cSrcweir#   file:  char.txt
6*cdf0e10cSrcweir#
7*cdf0e10cSrcweir#   ICU Character Break Rules, also known as Grapheme Cluster Boundaries
8*cdf0e10cSrcweir#      See Unicode Standard Annex #29.
9*cdf0e10cSrcweir#      These rules are based on TR29 Revision 13, for Unicode Version 5.1
10*cdf0e10cSrcweir#
11*cdf0e10cSrcweir
12*cdf0e10cSrcweir#
13*cdf0e10cSrcweir#  Character Class Definitions.
14*cdf0e10cSrcweir#
15*cdf0e10cSrcweir$CR          = [\p{Grapheme_Cluster_Break = CR}];
16*cdf0e10cSrcweir$LF          = [\p{Grapheme_Cluster_Break = LF}];
17*cdf0e10cSrcweir$Control     = [\p{Grapheme_Cluster_Break = Control}];
18*cdf0e10cSrcweir$Prepend     = [\p{Grapheme_Cluster_Break = Prepend}];
19*cdf0e10cSrcweir$Extend      = [\p{Grapheme_Cluster_Break = Extend}];
20*cdf0e10cSrcweir$SpacingMark = [\p{Grapheme_Cluster_Break = SpacingMark}];
21*cdf0e10cSrcweir$BengaliLetter = [\u0985-\u09B9 \u09CE \u09DC-\u09E1 \u09F0-\u09F1];
22*cdf0e10cSrcweir$BengaliSignVirama = \u09CD;
23*cdf0e10cSrcweir$GujaratiLetter = [\u0A85-\u0A8C \u0A8F-\u0A90 \u0A93-\u0AB9 \u0AE0-\u0AE1];
24*cdf0e10cSrcweir$GujaratiSignVirama = \u0ACD;
25*cdf0e10cSrcweir$DevanagariLetter = [\u0904-\u0939 \u0958-\u0961 \u0972-\u097F];
26*cdf0e10cSrcweir$DevanagariSignVirama = \u094D;
27*cdf0e10cSrcweir$KannadaLetter = [\u0C85-\u0CB9 \u0CDE-\u0CE1];
28*cdf0e10cSrcweir$KannadaSignVirama = \u0CCD;
29*cdf0e10cSrcweir$MalayalamLetter = [\u0D05-\u0D39 \u0D60-\u0D61 \u0D7A-\u0D7F];
30*cdf0e10cSrcweir$MalayalamSignVirama = \u0D4D;
31*cdf0e10cSrcweir$OriyaLetter = [\u0B05-\u0B39 \u0B5C-\u0B61 \u0B71];
32*cdf0e10cSrcweir$OriyaSignVirama = \u0B4D;
33*cdf0e10cSrcweir$GurmukhiLetter = [\u0A05-\u0A39 \u0A59-\u0A5E];
34*cdf0e10cSrcweir$GurmukhiSignVirama = \u0A4D;
35*cdf0e10cSrcweir$TamilLetter = [\u0B85-\u0BB9];
36*cdf0e10cSrcweir$TamilSignVirama = \u0BCD;
37*cdf0e10cSrcweir$TeluguLetter = [\u0C05-\u0C39 \u0C58-\u0C61];
38*cdf0e10cSrcweir$TeluguSignVirama = \u0C4D;
39*cdf0e10cSrcweir
40*cdf0e10cSrcweir#
41*cdf0e10cSrcweir# Korean Syllable Definitions
42*cdf0e10cSrcweir#
43*cdf0e10cSrcweir$L       = [\p{Grapheme_Cluster_Break = L}];
44*cdf0e10cSrcweir$V       = [\p{Grapheme_Cluster_Break = V}];
45*cdf0e10cSrcweir$T       = [\p{Grapheme_Cluster_Break = T}];
46*cdf0e10cSrcweir
47*cdf0e10cSrcweir$LV      = [\p{Grapheme_Cluster_Break = LV}];
48*cdf0e10cSrcweir$LVT     = [\p{Grapheme_Cluster_Break = LVT}];
49*cdf0e10cSrcweir
50*cdf0e10cSrcweir
51*cdf0e10cSrcweir## -------------------------------------------------
52*cdf0e10cSrcweir!!chain;
53*cdf0e10cSrcweir
54*cdf0e10cSrcweir!!forward;
55*cdf0e10cSrcweir
56*cdf0e10cSrcweir$CR $LF;
57*cdf0e10cSrcweir
58*cdf0e10cSrcweir$BengaliLetter ($BengaliSignVirama $BengaliLetter?)+;
59*cdf0e10cSrcweir$GujaratiLetter ($GujaratiSignVirama $GujaratiLetter?)+;
60*cdf0e10cSrcweir$DevanagariLetter ($DevanagariSignVirama $DevanagariLetter?)+;
61*cdf0e10cSrcweir$KannadaLetter ($KannadaSignVirama $KannadaLetter?)+;
62*cdf0e10cSrcweir$MalayalamLetter ($MalayalamSignVirama $MalayalamLetter?)+;
63*cdf0e10cSrcweir$OriyaLetter ($OriyaSignVirama $OriyaLetter?)+;
64*cdf0e10cSrcweir$GurmukhiLetter ($GurmukhiSignVirama $GurmukhiLetter?)+;
65*cdf0e10cSrcweir$TamilLetter ($TamilSignVirama $TamilLetter?)+;
66*cdf0e10cSrcweir$TeluguLetter ($TeluguSignVirama $TeluguLetter?)+;
67*cdf0e10cSrcweir
68*cdf0e10cSrcweir$L ($L | $V | $LV | $LVT);
69*cdf0e10cSrcweir($LV | $V) ($V | $T);
70*cdf0e10cSrcweir($LVT | $T) $T;
71*cdf0e10cSrcweir
72*cdf0e10cSrcweir[^$Control $CR $LF] $Extend;
73*cdf0e10cSrcweir
74*cdf0e10cSrcweir[^$Control $CR $LF] $SpacingMark;
75*cdf0e10cSrcweir$Prepend [^$Control $CR $LF];
76*cdf0e10cSrcweir
77*cdf0e10cSrcweir
78*cdf0e10cSrcweir## -------------------------------------------------
79*cdf0e10cSrcweir
80*cdf0e10cSrcweir!!reverse;
81*cdf0e10cSrcweir$LF $CR;
82*cdf0e10cSrcweir($BengaliLetter? $BengaliSignVirama)+ $BengaliLetter;
83*cdf0e10cSrcweir($GujaratiLetter? $GujaratiSignVirama)+ $GujaratiLetter;
84*cdf0e10cSrcweir($DevanagariLetter? $DevanagariSignVirama)+ $DevanagariLetter;
85*cdf0e10cSrcweir($KannadaLetter? $KannadaSignVirama)+ $KannadaLetter;
86*cdf0e10cSrcweir($MalayalamLetter? $MalayalamSignVirama)+ $MalayalamLetter;
87*cdf0e10cSrcweir($OriyaLetter? $OriyaSignVirama)+ $OriyaLetter;
88*cdf0e10cSrcweir($GurmukhiLetter? $GurmukhiSignVirama)+ $GurmukhiLetter;
89*cdf0e10cSrcweir($TamilLetter? $TamilSignVirama)+ $TamilLetter;
90*cdf0e10cSrcweir($TeluguLetter? $TeluguSignVirama)+ $TeluguLetter;
91*cdf0e10cSrcweir($L | $V | $LV | $LVT) $L;
92*cdf0e10cSrcweir($V | $T) ($LV | $V);
93*cdf0e10cSrcweir$T ($LVT | $T);
94*cdf0e10cSrcweir
95*cdf0e10cSrcweir$Extend      [^$Control $CR $LF];
96*cdf0e10cSrcweir$SpacingMark [^$Control $CR $LF];
97*cdf0e10cSrcweir[^$Control $CR $LF] $Prepend;
98*cdf0e10cSrcweir
99*cdf0e10cSrcweir
100*cdf0e10cSrcweir## -------------------------------------------------
101*cdf0e10cSrcweir
102*cdf0e10cSrcweir!!safe_reverse;
103*cdf0e10cSrcweir
104*cdf0e10cSrcweir
105*cdf0e10cSrcweir## -------------------------------------------------
106*cdf0e10cSrcweir
107*cdf0e10cSrcweir!!safe_forward;
108*cdf0e10cSrcweir
109