xref: /trunk/main/soltools/cpp/_tokens.c (revision 7ce20373)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <string.h>
25 #include <ctype.h>
26 #if (defined(_WIN32) || defined(_MSDOS) || defined(__IBMC__))
27 #include <io.h>
28 #else
29 #include <unistd.h>
30 #endif
31 #include "cpp.h"
32 
33 
34 static char wbuf[4 * OBS];
35 static char *wbp = wbuf;
36 static int EBCDIC_ExternTokenDetected = 0;
37 static int EBCDIC_StartTokenDetected = 0;
38 
39 unsigned char toLatin1[256] =
40 {
41     0x00, 0x01, 0x02, 0x03, 0x9c, 0x09, 0x86, 0x7f, 0x97, 0x8d,
42     0x8e, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13,
43     0x9d, 0x0a, 0x08, 0x87, 0x18, 0x19, 0x92, 0x8f, 0x1c, 0x1d,
44     0x1e, 0x1f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x17, 0x1b,
45     0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x05, 0x06, 0x07, 0x90, 0x91,
46     0x16, 0x93, 0x94, 0x95, 0x96, 0x04, 0x98, 0x99, 0x9a, 0x9b,
47     0x14, 0x15, 0x9e, 0x1a, 0x20, 0xa0, 0xe2, 0xe4, 0xe0, 0xe1,
48     0xe3, 0xe5, 0xe7, 0xf1, 0xa2, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
49     0x26, 0xe9, 0xea, 0xeb, 0xe8, 0xed, 0xee, 0xef, 0xec, 0xdf,
50     0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e, 0x2d, 0x2f, 0xc2, 0xc4,
51     0xc0, 0xc1, 0xc3, 0xc5, 0xc7, 0xd1, 0xa6, 0x2c, 0x25, 0x5f,
52     0x3e, 0x3f, 0xf8, 0xc9, 0xca, 0xcb, 0xc8, 0xcd, 0xce, 0xcf,
53     0xcc, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
54     0xd8, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
55     0xab, 0xbb, 0xf0, 0xfd, 0xfe, 0xb1, 0xb0, 0x6a, 0x6b, 0x6c,
56     0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0xaa, 0xba, 0xe6, 0xb8,
57     0xc6, 0xa4, 0xb5, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
58     0x79, 0x7a, 0xa1, 0xbf, 0xd0, 0x5b, 0xde, 0xae, 0xac, 0xa3,
59     0xa5, 0xb7, 0xa9, 0xa7, 0xb6, 0xbc, 0xbd, 0xbe, 0xdd, 0xa8,
60     0xaf, 0x5d, 0xb4, 0xd7, 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45,
61     0x46, 0x47, 0x48, 0x49, 0xad, 0xf4, 0xf6, 0xf2, 0xf3, 0xf5,
62     0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52,
63     0xb9, 0xfb, 0xfc, 0xf9, 0xfa, 0xff, 0x5c, 0xf7, 0x53, 0x54,
64     0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0xb2, 0xd4, 0xd6, 0xd2,
65     0xd3, 0xd5, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
66     0x38, 0x39, 0xb3, 0xdb, 0xdc, 0xd9, 0xda, 0x9f
67 };
68 
69 #define MASK    "\\x%x"
70 
71 int
memcpy_EBCDIC(char * pwbuf,uchar * p,int len)72     memcpy_EBCDIC( char * pwbuf, uchar *p, int len )
73 {
74     int currpos = 0;
75     int processedchars = 0;
76 
77     if( len == 0 )
78         return 0;
79 
80     if( len == 1 )
81     {
82         *pwbuf = *p;
83         return 1;
84     }
85 
86     /* copy spaces until " or ' */
87     while( (p[ processedchars ] != '\"') && (p[ processedchars ] != '\'') )
88         pwbuf[ currpos++ ] = p[ processedchars++ ];
89 
90     /* copy first " or ' */
91     pwbuf[ currpos++ ] = p[ processedchars++ ];
92 
93     /* convert all characters until " or ' */
94     while( processedchars < (len - 1) )
95     {
96         if( p[ processedchars ] == '\\' )
97         {
98             switch( p[ ++processedchars ] )
99             {
100                 case 'n':
101                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\n'] );
102                     processedchars++;
103                     break;
104 
105                 case 't':
106                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\t'] );
107                     processedchars++;
108                     break;
109 
110                 case 'v':
111                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\v'] );
112                     processedchars++;
113                     break;
114 
115                 case 'b':
116                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\b'] );
117                     processedchars++;
118                     break;
119 
120                 case 'r':
121                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\r'] );
122                     processedchars++;
123                     break;
124 
125                 case 'f':
126                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\f'] );
127                     processedchars++;
128                     break;
129 
130                 case 'a':
131                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\a'] );
132                     processedchars++;
133                     break;
134 
135                 case '\\':
136                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\\'] );
137                     processedchars++;
138                     break;
139 
140                 case '?':
141                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\?'] );
142                     processedchars++;
143                     break;
144 
145                 case '\'':
146                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\''] );
147                     processedchars++;
148                     break;
149 
150                 case '"':
151                     currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1['\"'] );
152                     processedchars++;
153                     break;
154 
155                 /* octal coded character? -> copy */
156                 case '0':
157                 case '1':
158                 case '2':
159                 case '3':
160                 case '4':
161                 case '5':
162                 case '6':
163                 case '7':
164                     {
165                     int startpos = currpos;
166 
167                     pwbuf[ currpos++ ] = '\\';
168 
169                     while( p[ processedchars ] >= '0' && p[ processedchars ] <= '7' && (currpos < startpos + 4) )
170                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
171                     break;
172                     }
173 
174                 /* hex coded character? -> copy */
175                 case 'x':
176                 case 'X':
177                     {
178                     int startpos = currpos;
179 
180                     pwbuf[ currpos++ ] = '\\';
181                     pwbuf[ currpos++ ] = 'x';
182                     processedchars++;
183 
184                     while( isxdigit( p[ processedchars ] ) && (currpos < startpos + 4) )
185                           pwbuf[ currpos++ ] = (unsigned char)p[ processedchars++ ];
186                     break;
187                     }
188 
189             }
190         }
191         else
192             currpos += sprintf( &pwbuf[ currpos ], MASK, toLatin1[p[ processedchars++ ]] );
193 
194     }
195 
196     /* copy last " or ' */
197     pwbuf[ currpos++ ] = p[ processedchars ];
198 
199     return currpos;
200 }
201 
202 void
maketokenrow(int size,Tokenrow * trp)203     maketokenrow(int size, Tokenrow * trp)
204 {
205     trp->max = size;
206     if (size > 0)
207         trp->bp = (Token *) domalloc(size * sizeof(Token));
208     else
209         trp->bp = NULL;
210     trp->tp = trp->bp;
211     trp->lp = trp->bp;
212 }
213 
214 Token *
growtokenrow(Tokenrow * trp)215     growtokenrow(Tokenrow * trp)
216 {
217     int ncur = trp->tp - trp->bp;
218     int nlast = trp->lp - trp->bp;
219 
220     trp->max = 3 * trp->max / 2 + 1;
221     trp->bp = (Token *) realloc(trp->bp, trp->max * sizeof(Token));
222     trp->lp = &trp->bp[nlast];
223     trp->tp = &trp->bp[ncur];
224     return trp->lp;
225 }
226 
227 /*
228  * Compare a row of tokens, ignoring the content of WS; return !=0 if different
229  */
230 int
comparetokens(Tokenrow * tr1,Tokenrow * tr2)231     comparetokens(Tokenrow * tr1, Tokenrow * tr2)
232 {
233     Token *tp1, *tp2;
234 
235     tp1 = tr1->tp;
236     tp2 = tr2->tp;
237     if (tr1->lp - tp1 != tr2->lp - tp2)
238         return 1;
239     for (; tp1 < tr1->lp; tp1++, tp2++)
240     {
241         if (tp1->type != tp2->type
242             || (tp1->wslen == 0) != (tp2->wslen == 0)
243             || tp1->len != tp2->len
244             || strncmp((char *) tp1->t, (char *) tp2->t, tp1->len) != 0)
245             return 1;
246     }
247     return 0;
248 }
249 
250 /*
251  * replace ntok tokens starting at dtr->tp with the contents of str.
252  * tp ends up pointing just beyond the replacement.
253  * Canonical whitespace is assured on each side.
254  */
255 void
insertrow(Tokenrow * dtr,int ntok,Tokenrow * str)256     insertrow(Tokenrow * dtr, int ntok, Tokenrow * str)
257 {
258     int nrtok = rowlen(str);
259 
260     dtr->tp += ntok;
261     adjustrow(dtr, nrtok - ntok);
262     dtr->tp -= ntok;
263     movetokenrow(dtr, str);
264     dtr->tp += nrtok;
265 }
266 
267 /*
268  * make sure there is WS before trp->tp, if tokens might merge in the output
269  */
270 void
makespace(Tokenrow * trp,Token * ntp)271     makespace(Tokenrow * trp, Token * ntp)
272 {
273     uchar *tt;
274     Token *tp = trp->tp;
275 
276     if (tp >= trp->lp)
277         return;
278 
279     if (ntp->wslen)
280     {
281         tt = newstring(tp->t, tp->len, ntp->wslen);
282         strncpy((char *)tt, (char *)ntp->t - ntp->wslen, ntp->wslen);
283         tp->t = tt + ntp->wslen;
284         tp->wslen = ntp->wslen;
285         tp->flag |= XPWS;
286     }
287 }
288 
289 /*
290  * Copy an entire tokenrow into another, at tp.
291  * It is assumed that there is enough space.
292  *  Not strictly conforming.
293  */
294 void
movetokenrow(Tokenrow * dtr,Tokenrow * str)295     movetokenrow(Tokenrow * dtr, Tokenrow * str)
296 {
297     int nby;
298 
299     /* nby = sizeof(Token) * (str->lp - str->bp); */
300     nby = (char *) str->lp - (char *) str->bp;
301     memmove(dtr->tp, str->bp, nby);
302 }
303 
304 /*
305  * Move the tokens in a row, starting at tr->tp, rightward by nt tokens;
306  * nt may be negative (left move).
307  * The row may need to be grown.
308  * Non-strictly conforming because of the (char *), but easily fixed
309  */
310 void
adjustrow(Tokenrow * trp,int nt)311     adjustrow(Tokenrow * trp, int nt)
312 {
313     int nby, size;
314 
315     if (nt == 0)
316         return;
317     size = (trp->lp - trp->bp) + nt;
318     while (size > trp->max)
319         growtokenrow(trp);
320     /* nby = sizeof(Token) * (trp->lp - trp->tp); */
321     nby = (char *) trp->lp - (char *) trp->tp;
322     if (nby)
323         memmove(trp->tp + nt, trp->tp, nby);
324     trp->lp += nt;
325 }
326 
327 /*
328  * Copy a row of tokens into the destination holder, allocating
329  * the space for the contents.  Return the destination.
330  */
331 Tokenrow *
copytokenrow(Tokenrow * dtr,Tokenrow * str)332     copytokenrow(Tokenrow * dtr, Tokenrow * str)
333 {
334     int len = rowlen(str);
335 
336     maketokenrow(len, dtr);
337     movetokenrow(dtr, str);
338     dtr->lp += len;
339     return dtr;
340 }
341 
342 /*
343  * Produce a copy of a row of tokens.  Start at trp->tp.
344  * The value strings are copied as well.  The first token
345  * has WS available.
346  */
347 Tokenrow *
normtokenrow(Tokenrow * trp)348     normtokenrow(Tokenrow * trp)
349 {
350     Token *tp;
351     Tokenrow *ntrp = new(Tokenrow);
352     int len;
353 
354     len = trp->lp - trp->tp;
355     if (len <= 0)
356         len = 1;
357     maketokenrow(len, ntrp);
358     for (tp = trp->tp; tp < trp->lp; tp++)
359     {
360         *ntrp->lp = *tp;
361         if (tp->len)
362         {
363             ntrp->lp->t = newstring(tp->t, tp->len, 1);
364             *ntrp->lp->t++ = ' ';
365             if (tp->wslen)
366                 ntrp->lp->wslen = 1;
367         }
368         ntrp->lp++;
369     }
370     if (ntrp->lp > ntrp->bp)
371         ntrp->bp->wslen = 0;
372     return ntrp;
373 }
374 
375 /*
376  * Debugging
377  */
378 void
peektokens(Tokenrow * trp,char * str)379     peektokens(Tokenrow * trp, char *str)
380 {
381     Token *tp;
382 
383     tp = trp->tp;
384     flushout();
385     if (str)
386         fprintf(stderr, "%s ", str);
387     if (tp < trp->bp || tp > trp->lp)
388         fprintf(stderr, "(tp offset %ld) ", (long int) (tp - trp->bp));
389     for (tp = trp->bp; tp < trp->lp && tp < trp->bp + 32; tp++)
390     {
391         if (tp->type != NL)
392         {
393             int c = tp->t[tp->len];
394 
395             tp->t[tp->len] = 0;
396             fprintf(stderr, "%s", tp->t);
397             tp->t[tp->len] = (uchar) c;
398         }
399         fprintf(stderr, tp == trp->tp ? "{%x*} " : "{%x} ", tp->type);
400     }
401     fprintf(stderr, "\n");
402     fflush(stderr);
403 }
404 
405 void
puttokens(Tokenrow * trp)406     puttokens(Tokenrow * trp)
407 {
408     Token *tp;
409     int len;
410     uchar *p;
411 
412     if (Vflag)
413         peektokens(trp, "");
414     tp = trp->bp;
415     for (; tp < trp->lp; tp++)
416     {
417         if (tp->type != NL)
418         {
419             len = tp->len + tp->wslen;
420             p = tp->t - tp->wslen;
421 
422 			/* add parameter check to delete operator? */
423 			if( Dflag )
424 			{
425 				if( (tp->type == NAME) && (strncmp( (char*)p, "delete", len ) == 0) )
426 				{
427 					Token* ntp = tp;
428 					ntp++;
429 
430 					if( ntp->type == NAME )
431 					{
432 						uchar* np = ntp->t - ntp->wslen;
433 			            int nlen = ntp->len + ntp->wslen;
434 
435 						memcpy(wbp, "if(", 3 );
436  			            wbp += 4;
437 						memcpy(wbp, np, nlen );
438  			            wbp += nlen;
439 						memcpy(wbp, ")", 1 );
440  			            wbp++;
441 
442                         memcpy(wbp, p, len);
443 					}
444 				}
445 			}
446 
447             /* EBCDIC to ANSI conversion requested? */
448             if( Aflag )
449             {
450                 /* keyword __ToLatin1__ found? -> do conversion! */
451                 if( EBCDIC_StartTokenDetected )
452                 {
453                     /* previous token was 'extern'? -> don't convert current token! */
454                     if( EBCDIC_ExternTokenDetected )
455                     {
456                         EBCDIC_ExternTokenDetected = 0;
457                         memcpy(wbp, p, len);
458                     }
459                     else
460                     {
461                         /* current token is keyword 'extern'? -> don't convert following token! */
462                         if( (tp->wslen == 0) && (strncmp( (char*)p, "extern", len ) == 0) )
463                         {
464                             EBCDIC_ExternTokenDetected = 1;
465                             memcpy(wbp, p, len);
466                         }
467                         else
468                         {
469                             /* token is string or char? -> process EBCDIC to ANSI conversion */
470                             if ((tp->type == STRING) || (tp->type == CCON))
471                                 len = memcpy_EBCDIC(wbp,  p, len);
472                             else
473                                 memcpy(wbp, p, len);
474                         }
475                     }
476                 }
477                 else
478                     /* keyword __ToLatin1__ found? -> don't copy keyword and start conversion */
479                     if( (tp->type == NAME) && (strncmp( (char*)p, "__ToLatin1__", len) == 0) )
480                     {
481                         EBCDIC_StartTokenDetected = 1;
482                         len = 0;
483                     }
484                     else
485                         memcpy(wbp, p, len);
486             }
487             else
488                 memcpy(wbp, p, len);
489 
490             wbp += len;
491         }
492         else
493             *wbp++ = '\n';
494 
495         if (wbp >= &wbuf[OBS])
496         {
497             if ( write(1, wbuf, OBS) != -1 ) {
498             if (wbp > &wbuf[OBS])
499                 memcpy(wbuf, wbuf + OBS, wbp - &wbuf[OBS]);
500             wbp -= OBS;
501 	    }
502 		else exit(1);
503         }
504     }
505     trp->tp = tp;
506     if (cursource->fd == 0)
507         flushout();
508 }
509 
510 void
flushout(void)511     flushout(void)
512 {
513     if (wbp > wbuf)
514     {
515         if ( write(1, wbuf, wbp - wbuf) != -1)
516         	wbp = wbuf;
517 	else
518 		exit(1);
519     }
520 }
521 
522 /*
523  * turn a row into just a newline
524  */
525 void
setempty(Tokenrow * trp)526     setempty(Tokenrow * trp)
527 {
528     trp->tp = trp->bp;
529     trp->lp = trp->bp + 1;
530     *trp->bp = nltoken;
531 }
532 
533 /*
534  * generate a number
535  */
536 char *
outnum(char * p,int n)537     outnum(char *p, int n)
538 {
539     if (n >= 10)
540         p = outnum(p, n / 10);
541     *p++ = (char) (n % 10 + '0');
542     return p;
543 }
544 
545 /*
546  * allocate and initialize a new string from s, of length l, at offset o
547  * Null terminated.
548  */
549 uchar *
newstring(uchar * s,int l,int o)550     newstring(uchar * s, int l, int o)
551 {
552     uchar *ns = (uchar *) domalloc(l + o + 1);
553 
554     ns[l + o] = '\0';
555     return (uchar *) strncpy((char *) ns + o, (char *) s, l) - o;
556 }
557