1#!/usr/bin/gawk -f
2# Usage: gawk -f currency-check.awk *.xml
3# Check any
4# <FormatCode>...[$xxx-...]...</FormatCode>
5# against every
6# <CurrencySymbol>xxx</CurrencySymbol>
7# definition of the same XML file and output symbols if no match was found.
8# For formatindex="12" to formatindex="15" and for formatindex="17" it is
9# checked if the used currency symbol is the usedInCompatibleFormatCodes
10# currency symbol as it is needed by the number formatter.
11# Also generates output if the generic currency symbol (UTF8 string 0xC2A4)
12# is used instead of a real currency symbol.
13# Author: Eike Rathke <er@openoffice.org>
14
15BEGIN {
16    file = ""
17}
18
19
20file != FILENAME {
21    if ( file )
22        checkIt()
23    file = FILENAME
24    line = 0
25    nFormats = 0
26    nCurrencies = 0
27    bFormatAuto = 0
28    sReplaceFrom = ""
29    sReplaceTo = ""
30    sMatchReplace = ""
31    sRefCurrencyFromLocale = ""
32    crlf = 0
33}
34
35{
36    ++line
37    # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings
38    # are boo anyways.
39    if ( /\x0D$/ )
40    {
41        print "Error: not Unix line ending in line " line
42        crlf = 1
43        exit(1)
44    }
45    if ( $1 ~ /^<LC_FORMAT(>|$)/ )
46    {
47        if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ )
48        {
49            sReplaceFrom = "\\[CURRENCY\\]"
50            sMatchReplace = "^<FormatCode>.*" sReplaceFrom
51        }
52        for ( j=2; j<=NF; ++j )
53        {
54            if ( $j ~ /^replaceTo="/ )
55            {
56                l = 12
57                if ( $j ~ />$/ )
58                    ++l
59                if ( $j ~ /\/>$/ )
60                    ++l
61                sReplaceTo = substr( $j, 12, length($j)-l )
62            }
63        }
64    }
65    else if ( $1 ~ /^<FormatElement(>|$)/ )
66    {
67        if ( $0 ~ /usage="CURRENCY"/ )
68        {
69            if ( $0 ~ /formatindex="1[23457]"/ )
70                bFormatAuto = 1
71            else
72                bFormatAuto = 0
73        }
74    }
75    else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ ||
76            (sMatchReplace && $0 ~ sMatchReplace ) )
77    {
78        if ( sReplaceFrom )
79            gsub( sReplaceFrom, sReplaceTo )
80        split( $0, arr, /<|>/ )
81        split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ )
82        for ( j in code )
83        {
84            if ( code[j] && code[j] !~ /\#|0|\[NatNum/ )
85            {
86                FormatLine[nFormats] = file " line " line
87                FormatAuto[nFormats] = bFormatAuto
88                Formats[nFormats++] = code[j]
89            }
90        }
91        bFormatAuto = 0
92    }
93    else if ( $1 ~ /^<LC_CURRENCY(>|$)/ )
94    {
95        for ( j=2; j<=NF; ++j )
96        {
97            if ( $j ~ /^ref="/ )
98            {
99                l = 6
100                if ( $j ~ />$/ )
101                    ++l
102                if ( $j ~ /\/>$/ )
103                    ++l
104                locale = substr( $j, 6, length($j)-l )
105                sRefCurrencyFromLocale = file
106                oldfile = file
107                oldline = line
108                file = locale ".xml"
109                line = 0
110                while ( (getline <file) > 0 )
111                {
112                    ++line
113                    getCurrencyParams()
114                }
115                close( file )
116                if ( !line )
117                    print "ref locale not available: " file \
118                        " (from " oldfile " line " oldline ")"
119                file = oldfile
120                line = oldline
121                sRefCurrencyFromLocale = ""
122            }
123        }
124    }
125    else
126        getCurrencyParams()
127}
128
129
130END {
131    if ( file && !crlf )
132        checkIt()
133}
134
135
136function getCurrencyParams() {
137    # Assumes that each element is on a line on its own!
138    if ( $1 ~ /^<Currency(>|$)/ )
139    {
140        if ( $0 ~ /default="true"/ )
141            SymbolDefault[nCurrencies] = 1
142        else
143            SymbolDefault[nCurrencies] = 0
144        if ( $0 ~ /usedInCompatibleFormatCodes="true"/ )
145            SymbolCompati[nCurrencies] = 1
146        else
147            SymbolCompati[nCurrencies] = 0
148    }
149    else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ )
150    {
151        split( $0, arr, /<|>/ )
152        if ( sRefCurrencyFromLocale )
153            IDLine[nCurrencies] = file " line " line \
154                " (referenced from " sRefCurrencyFromLocale ")"
155        else
156            IDLine[nCurrencies] = file " line " line
157        IDs[nCurrencies] = arr[3]
158    }
159    else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ )
160    {
161        split( $0, arr, /<|>/ )
162        if ( sRefCurrencyFromLocale )
163            SymbolLine[nCurrencies] = file " line " line \
164                " (referenced from " sRefCurrencyFromLocale ")"
165        else
166            SymbolLine[nCurrencies] = file " line " line
167        Symbols[nCurrencies] = arr[3]
168    }
169    else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ )
170    {
171        split( $0, arr, /<|>/ )
172        if ( sRefCurrencyFromLocale )
173            BankSymbolLine[nCurrencies] = file " line " line \
174                " (referenced from " sRefCurrencyFromLocale ")"
175        else
176            BankSymbolLine[nCurrencies] = file " line " line
177        BankSymbols[nCurrencies] = arr[3]
178    }
179    else if ( $1 ~ /^<\/Currency>/ )
180    {
181        ++nCurrencies
182    }
183}
184
185
186function checkIt() {
187    bad = 0
188    for ( j=0; j<nFormats; ++j )
189    {
190        state = FormatInSymbol( Formats[j] )
191        if ( Formats[j] == "\xc2\xa4" )
192        {
193            bad = 1
194            print "    bad: `" Formats[j] "'   (" FormatLine[j] ")"
195        }
196        else if ( state == 0 )
197        {
198            bad = 1
199            print "unknown: `" Formats[j] "'   (" FormatLine[j] ")"
200        }
201        else if ( FormatAuto[j] && state < 2 )
202        {
203            bad = 1
204            print "badauto: `" Formats[j] "'   (" FormatLine[j] ")"
205        }
206    }
207    if ( bad )
208    {
209        for ( j=0; j<nCurrencies; ++j )
210        {
211            bDef = 0
212            if ( Symbols[j] == "\xc2\xa4" )
213                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
214            if ( SymbolDefault[j] )
215            {
216                bDef = 1
217                print "default: `" Symbols[j] "'   (" SymbolLine[j] ")"
218            }
219            if ( SymbolCompati[j] )
220            {
221                bDef = 1
222                print "compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
223            }
224            if ( !bDef )
225                print "defined: `" Symbols[j] "'   (" SymbolLine[j] ")"
226        }
227    }
228    else
229    {
230        bHasDefault = 0
231        bHasCompati = 0
232        for ( j=0; j<nCurrencies; ++j )
233        {
234            if ( Symbols[j] == "\xc2\xa4" )
235            {
236                bad = 1
237                print "def bad: `" Symbols[j] "'   (" SymbolLine[j] ")"
238            }
239            if ( SymbolDefault[j] )
240            {
241                if ( !bHasDefault )
242                    bHasDefault = 1
243                else
244                {
245                    bad = 1
246                    print "dupe default: `" Symbols[j] "'   (" SymbolLine[j] ")"
247                }
248            }
249            if ( SymbolCompati[j] )
250            {
251                if ( !bHasCompati )
252                    bHasCompati = 1
253                else
254                {
255                    bad = 1
256                    print "dupe compati: `" Symbols[j] "'   (" SymbolLine[j] ")"
257                }
258            }
259        }
260        if ( !bHasDefault )
261        {
262            bad = 1
263            print "  no default: (" file ")"
264        }
265        if ( !bHasCompati )
266        {
267            bad = 1
268            print "  no compati: (" file ")"
269        }
270    }
271    for ( j=0; j<nCurrencies; ++j )
272    {
273        # Check if CurrencyID at least resembles some ISO 4217 code.
274        # The only exception is zh_MO that had an erroneous original data set
275        # with BankSymbol="P" (stored as ISO code in documents, hence copied to
276        # CurrencyID now) and needs that entry for legacy documents.
277        # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on
278        # lower case except 'a', regardless of IGNORECASE setting, hence this
279        # ugly notation. [[:upper:]] wouldn't be correct since we want only
280        # ASCII to match.
281        if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \
282              && !(file == "zh_MO.xml" && IDs[j] == "P") )
283        {
284            bad = 1
285            print "no ISO 4217 code: `" IDs[j] "'   (" IDLine[j] ")"
286        }
287        # CurrencyID should equal BankSymbol for now.
288        if ( IDs[j] != BankSymbols[j] )
289        {
290            bad = 1
291            print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \
292                  "'   (" IDLine[j] " and " BankSymbolLine[j] ")"
293        }
294    }
295    if ( bad )
296        print ""
297}
298
299
300function FormatInSymbol( format ) {
301    state = 0
302    for ( nSym=0; nSym<nCurrencies; ++nSym )
303    {
304        if ( format == Symbols[nSym] )
305        {
306            # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.'
307            # for AZM and AZN), continue to lookup if the match isn't the
308            # compatible one.
309            if ( SymbolCompati[nSym] )
310                return 2
311            else
312                state = 1
313        }
314    }
315    return state
316}
317
318# vim: ts=4 sw=4 expandtab
319