1#!/usr/bin/gawk -f 2# Usage: gawk -f currency-check.awk *.xml 3# Check any 4# <FormatCode>...[$xxx-...]...</FormatCode> 5# against every 6# <CurrencySymbol>xxx</CurrencySymbol> 7# definition of the same XML file and output symbols if no match was found. 8# For formatindex="12" to formatindex="15" and for formatindex="17" it is 9# checked if the used currency symbol is the usedInCompatibleFormatCodes 10# currency symbol as it is needed by the number formatter. 11# Also generates output if the generic currency symbol (UTF8 string 0xC2A4) 12# is used instead of a real currency symbol. 13# Author: Eike Rathke <er@openoffice.org> 14 15BEGIN { 16 file = "" 17} 18 19 20file != FILENAME { 21 if ( file ) 22 checkIt() 23 file = FILENAME 24 line = 0 25 nFormats = 0 26 nCurrencies = 0 27 bFormatAuto = 0 28 sReplaceFrom = "" 29 sReplaceTo = "" 30 sMatchReplace = "" 31 sRefCurrencyFromLocale = "" 32 crlf = 0 33} 34 35{ 36 ++line 37 # If run under Unix a CrLf spoils ...$ line end checks. DOS line endings 38 # are boo anyways. 39 if ( /\x0D$/ ) 40 { 41 print "Error: not Unix line ending in line " line 42 crlf = 1 43 exit(1) 44 } 45 if ( $1 ~ /^<LC_FORMAT(>|$)/ ) 46 { 47 if ( $0 ~ /replaceFrom="\[CURRENCY\]"/ ) 48 { 49 sReplaceFrom = "\\[CURRENCY\\]" 50 sMatchReplace = "^<FormatCode>.*" sReplaceFrom 51 } 52 for ( j=2; j<=NF; ++j ) 53 { 54 if ( $j ~ /^replaceTo="/ ) 55 { 56 l = 12 57 if ( $j ~ />$/ ) 58 ++l 59 if ( $j ~ /\/>$/ ) 60 ++l 61 sReplaceTo = substr( $j, 12, length($j)-l ) 62 } 63 } 64 } 65 else if ( $1 ~ /^<FormatElement(>|$)/ ) 66 { 67 if ( $0 ~ /usage="CURRENCY"/ ) 68 { 69 if ( $0 ~ /formatindex="1[23457]"/ ) 70 bFormatAuto = 1 71 else 72 bFormatAuto = 0 73 } 74 } 75 else if ( $0 ~ /^[[:blank:]]*<FormatCode>.*\[\$.*-[0-9a-fA-F]+\]/ || 76 (sMatchReplace && $0 ~ sMatchReplace ) ) 77 { 78 if ( sReplaceFrom ) 79 gsub( sReplaceFrom, sReplaceTo ) 80 split( $0, arr, /<|>/ ) 81 split( arr[3], code, /(\[\$)|(-[0-9a-fA-F]+\])/ ) 82 for ( j in code ) 83 { 84 if ( code[j] && code[j] !~ /\#|0|\[NatNum/ ) 85 { 86 FormatLine[nFormats] = file " line " line 87 FormatAuto[nFormats] = bFormatAuto 88 Formats[nFormats++] = code[j] 89 } 90 } 91 bFormatAuto = 0 92 } 93 else if ( $1 ~ /^<LC_CURRENCY(>|$)/ ) 94 { 95 for ( j=2; j<=NF; ++j ) 96 { 97 if ( $j ~ /^ref="/ ) 98 { 99 l = 6 100 if ( $j ~ />$/ ) 101 ++l 102 if ( $j ~ /\/>$/ ) 103 ++l 104 locale = substr( $j, 6, length($j)-l ) 105 sRefCurrencyFromLocale = file 106 oldfile = file 107 oldline = line 108 file = locale ".xml" 109 line = 0 110 while ( (getline <file) > 0 ) 111 { 112 ++line 113 getCurrencyParams() 114 } 115 close( file ) 116 if ( !line ) 117 print "ref locale not available: " file \ 118 " (from " oldfile " line " oldline ")" 119 file = oldfile 120 line = oldline 121 sRefCurrencyFromLocale = "" 122 } 123 } 124 } 125 else 126 getCurrencyParams() 127} 128 129 130END { 131 if ( file && !crlf ) 132 checkIt() 133} 134 135 136function getCurrencyParams() { 137 # Assumes that each element is on a line on its own! 138 if ( $1 ~ /^<Currency(>|$)/ ) 139 { 140 if ( $0 ~ /default="true"/ ) 141 SymbolDefault[nCurrencies] = 1 142 else 143 SymbolDefault[nCurrencies] = 0 144 if ( $0 ~ /usedInCompatibleFormatCodes="true"/ ) 145 SymbolCompati[nCurrencies] = 1 146 else 147 SymbolCompati[nCurrencies] = 0 148 } 149 else if ( $0 ~ /^[[:blank:]]*<CurrencyID>/ ) 150 { 151 split( $0, arr, /<|>/ ) 152 if ( sRefCurrencyFromLocale ) 153 IDLine[nCurrencies] = file " line " line \ 154 " (referenced from " sRefCurrencyFromLocale ")" 155 else 156 IDLine[nCurrencies] = file " line " line 157 IDs[nCurrencies] = arr[3] 158 } 159 else if ( $0 ~ /^[[:blank:]]*<CurrencySymbol>/ ) 160 { 161 split( $0, arr, /<|>/ ) 162 if ( sRefCurrencyFromLocale ) 163 SymbolLine[nCurrencies] = file " line " line \ 164 " (referenced from " sRefCurrencyFromLocale ")" 165 else 166 SymbolLine[nCurrencies] = file " line " line 167 Symbols[nCurrencies] = arr[3] 168 } 169 else if ( $0 ~ /^[[:blank:]]*<BankSymbol>/ ) 170 { 171 split( $0, arr, /<|>/ ) 172 if ( sRefCurrencyFromLocale ) 173 BankSymbolLine[nCurrencies] = file " line " line \ 174 " (referenced from " sRefCurrencyFromLocale ")" 175 else 176 BankSymbolLine[nCurrencies] = file " line " line 177 BankSymbols[nCurrencies] = arr[3] 178 } 179 else if ( $1 ~ /^<\/Currency>/ ) 180 { 181 ++nCurrencies 182 } 183} 184 185 186function checkIt() { 187 bad = 0 188 for ( j=0; j<nFormats; ++j ) 189 { 190 state = FormatInSymbol( Formats[j] ) 191 if ( Formats[j] == "\xc2\xa4" ) 192 { 193 bad = 1 194 print " bad: `" Formats[j] "' (" FormatLine[j] ")" 195 } 196 else if ( state == 0 ) 197 { 198 bad = 1 199 print "unknown: `" Formats[j] "' (" FormatLine[j] ")" 200 } 201 else if ( FormatAuto[j] && state < 2 ) 202 { 203 bad = 1 204 print "badauto: `" Formats[j] "' (" FormatLine[j] ")" 205 } 206 } 207 if ( bad ) 208 { 209 for ( j=0; j<nCurrencies; ++j ) 210 { 211 bDef = 0 212 if ( Symbols[j] == "\xc2\xa4" ) 213 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 214 if ( SymbolDefault[j] ) 215 { 216 bDef = 1 217 print "default: `" Symbols[j] "' (" SymbolLine[j] ")" 218 } 219 if ( SymbolCompati[j] ) 220 { 221 bDef = 1 222 print "compati: `" Symbols[j] "' (" SymbolLine[j] ")" 223 } 224 if ( !bDef ) 225 print "defined: `" Symbols[j] "' (" SymbolLine[j] ")" 226 } 227 } 228 else 229 { 230 bHasDefault = 0 231 bHasCompati = 0 232 for ( j=0; j<nCurrencies; ++j ) 233 { 234 if ( Symbols[j] == "\xc2\xa4" ) 235 { 236 bad = 1 237 print "def bad: `" Symbols[j] "' (" SymbolLine[j] ")" 238 } 239 if ( SymbolDefault[j] ) 240 { 241 if ( !bHasDefault ) 242 bHasDefault = 1 243 else 244 { 245 bad = 1 246 print "dupe default: `" Symbols[j] "' (" SymbolLine[j] ")" 247 } 248 } 249 if ( SymbolCompati[j] ) 250 { 251 if ( !bHasCompati ) 252 bHasCompati = 1 253 else 254 { 255 bad = 1 256 print "dupe compati: `" Symbols[j] "' (" SymbolLine[j] ")" 257 } 258 } 259 } 260 if ( !bHasDefault ) 261 { 262 bad = 1 263 print " no default: (" file ")" 264 } 265 if ( !bHasCompati ) 266 { 267 bad = 1 268 print " no compati: (" file ")" 269 } 270 } 271 for ( j=0; j<nCurrencies; ++j ) 272 { 273 # Check if CurrencyID at least resembles some ISO 4217 code. 274 # The only exception is zh_MO that had an erroneous original data set 275 # with BankSymbol="P" (stored as ISO code in documents, hence copied to 276 # CurrencyID now) and needs that entry for legacy documents. 277 # There is a strange bug in gawk 3.1.4 that does a match of [A-Z] on 278 # lower case except 'a', regardless of IGNORECASE setting, hence this 279 # ugly notation. [[:upper:]] wouldn't be correct since we want only 280 # ASCII to match. 281 if ( IDs[j] !~ /^[ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ][ABCDEFGHIJKLMNOPQRSTUVWXYZ]$/ \ 282 && !(file == "zh_MO.xml" && IDs[j] == "P") ) 283 { 284 bad = 1 285 print "no ISO 4217 code: `" IDs[j] "' (" IDLine[j] ")" 286 } 287 # CurrencyID should equal BankSymbol for now. 288 if ( IDs[j] != BankSymbols[j] ) 289 { 290 bad = 1 291 print "not equal: CurrencyID `" IDs[j] "' != BankSymbol `" BankSymbols[j] \ 292 "' (" IDLine[j] " and " BankSymbolLine[j] ")" 293 } 294 } 295 if ( bad ) 296 print "" 297} 298 299 300function FormatInSymbol( format ) { 301 state = 0 302 for ( nSym=0; nSym<nCurrencies; ++nSym ) 303 { 304 if ( format == Symbols[nSym] ) 305 { 306 # Two currencies can have the same symbol (e.g. az_AZ.xml 'man.' 307 # for AZM and AZN), continue to lookup if the match isn't the 308 # compatible one. 309 if ( SymbolCompati[nSym] ) 310 return 2 311 else 312 state = 1 313 } 314 } 315 return state 316} 317 318# vim: ts=4 sw=4 expandtab 319