xref: /aoo4110/main/sw/source/filter/ww8/dump/msvbasic.cxx (revision b1cdbd2c)
1 /**************************************************************
2  *
3  * Licensed to the Apache Software Foundation (ASF) under one
4  * or more contributor license agreements.  See the NOTICE file
5  * distributed with this work for additional information
6  * regarding copyright ownership.  The ASF licenses this file
7  * to you under the Apache License, Version 2.0 (the
8  * "License"); you may not use this file except in compliance
9  * with the License.  You may obtain a copy of the License at
10  *
11  *   http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing,
14  * software distributed under the License is distributed on an
15  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16  * KIND, either express or implied.  See the License for the
17  * specific language governing permissions and limitations
18  * under the License.
19  *
20  *************************************************************/
21 
22 
23 
24 // MARKER(update_precomp.py): autogen include statement, do not remove
25 #include "precompiled_sw.hxx"
26 
27 
28 #include <string.h> 	// memset(), ...
29 #ifndef UNX
30 #include <io.h> 		// access()
31 #endif
32 #include <msvbasic.hxx>
33 
34 /* class VBA_Impl:
35  * The VBA class provides a set of methods to handle Visual Basic For
36  * Applications streams, the constructor is given the root ole2 stream
37  * of the document, Open reads the VBA project file and figures out
38  * the number of VBA streams, and the offset of the data within them.
39  * Decompress decompresses a particular numbered stream, NoStreams returns
40  * this number, and StreamName can give you the streams name. Decompress
41  * will call Output when it has a 4096 byte collection of data to output,
42  * and also with the final remainder of data if there is still some left
43  * at the end of compression. Output is virtual to allow custom handling
44  * of each chunk of decompressed data. So inherit from this to do something
45  * useful with the data.
46  *
47  * cmc
48  * */
49 
ReadPString(SvStorageStreamRef & xVBAProject)50 sal_uInt8 VBA_Impl::ReadPString(SvStorageStreamRef &xVBAProject)
51 {
52 	sal_uInt16 idlen;
53 	sal_uInt8 type=0;
54 	*xVBAProject >> idlen;
55 	sal_uInt8 out;
56 	int i=0;
57 	if (idlen < 6)
58 	{
59 		type=0;
60 		xVBAProject->SeekRel(-2);
61 		return(type);
62 	}
63 
64 	for(i=0;i<idlen/2;i++)
65 	{
66 		*xVBAProject >> out;
67 		xVBAProject->SeekRel(1);
68 		if (i==2)
69 		{
70 			type=out;
71 			if ((type != 'G') && (type != 'C'))
72 				type=0;
73 			if (type == 0)
74 			{
75 				xVBAProject->SeekRel(-8);
76 				break;
77 			}
78 		}
79 	}
80 
81 
82 	return(type);
83 }
84 
ConfirmFixedOctect(SvStorageStreamRef & xVBAProject)85 void VBA_Impl::ConfirmFixedOctect(SvStorageStreamRef &xVBAProject)
86 {
87 	static const sal_uInt8 stest[8] =
88 		{
89 		0x06, 0x02, 0x01, 0x00, 0x08, 0x02, 0x00, 0x00
90 		};
91 
92 	sal_uInt8 test[8];
93 	xVBAProject->Read(test,8);
94 	if (memcmp(stest,test,8) != 0)
95 		DBG_WARNING("Found a different octect, please report");
96 }
97 
Confirm12Zeros(SvStorageStreamRef & xVBAProject)98 void VBA_Impl::Confirm12Zeros(SvStorageStreamRef &xVBAProject)
99 {
100 	static const sal_uInt8 stest[12]={0};
101 	sal_uInt8 test[12];
102 	xVBAProject->Read(test,12);
103 	if (memcmp(stest,test,12) != 0)
104 		DBG_WARNING("Found a Non Zero block, please report");
105 }
106 
ConfirmHalfWayMarker(SvStorageStreamRef & xVBAProject)107 void VBA_Impl::ConfirmHalfWayMarker(SvStorageStreamRef &xVBAProject)
108 {
109 	static const sal_uInt8 stest[12]={0,0,0,0,0,0,0,0,0,0,1,0};
110 	sal_uInt8 test[12];
111 	xVBAProject->Read(test,12);
112 	if (memcmp(stest,test,12) != 0)
113 		DBG_WARNING("Found a different halfway marker, please report");
114 }
115 
ConfirmFixedMiddle(SvStorageStreamRef & xVBAProject)116 void VBA_Impl::ConfirmFixedMiddle(SvStorageStreamRef &xVBAProject)
117 {
118 	static const sal_uInt8 stest[20] =
119 	{
120 		0x00, 0x00, 0xe1, 0x2e, 0x45, 0x0d, 0x8f, 0xe0,
121 		0x1a, 0x10, 0x85, 0x2e, 0x02, 0x60, 0x8c, 0x4d,
122 		0x0b, 0xb4, 0x00, 0x00
123 	};
124 
125 	sal_uInt8 test[20];
126 	xVBAProject->Read(test,20);
127 	if (memcmp(stest,test,20) != 0)
128 	{
129 		DBG_WARNING("Found a different middle marker, please report");
130 		xVBAProject->SeekRel(-20);
131 	}
132 }
133 
ConfirmFixedMiddle2(SvStorageStreamRef & xVBAProject)134 void VBA_Impl::ConfirmFixedMiddle2(SvStorageStreamRef &xVBAProject)
135 {
136 	static const sal_uInt8 stest[20] =
137 	{
138 		0x00, 0x00, 0x2e, 0xc9, 0x27, 0x8e, 0x64, 0x12,
139 		0x1c, 0x10, 0x8a, 0x2f, 0x04, 0x02, 0x24, 0x00,
140 		0x9c, 0x02, 0x00, 0x00
141 	};
142 
143 	sal_uInt8 test[20];
144 	xVBAProject->Read(test,20);
145 	if (memcmp(stest,test,20) != 0)
146 		{
147 		DBG_WARNING("Found a different middle2 marker, please report");
148 		xVBAProject->SeekRel(-20);
149 		}
150 }
151 
152 
Output(int nLen,const sal_uInt8 * pData)153 void VBA_Impl::Output( int nLen, const sal_uInt8 *pData)
154 {
155 	sVBAString += String( (const sal_Char *)pData, nLen );
156 /*
157 //For debugging purposes
158 	for(int i=0;i<len;i++)
159 		*pOut << data[i];
160 */
161 }
162 
163 
ReadVBAProject(const SvStorageRef & rxVBAStorage)164 int VBA_Impl::ReadVBAProject(const SvStorageRef &rxVBAStorage)
165 	{
166 	SvStorageStreamRef xVBAProject;
167 	xVBAProject = rxVBAStorage->OpenStream(
168 					String::CreateFromAscii( "_VBA_PROJECT" ),
169 					STREAM_STD_READ | STREAM_NOCREATE );
170 
171 	if( !xVBAProject.Is() || SVSTREAM_OK != xVBAProject->GetError() )
172 	{
173 		DBG_WARNING("Not able to find vba project, cannot find macros");
174 		return(0);
175 	}
176 	xVBAProject->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN );
177 
178 	//*pOut << hex;
179 	sal_uInt8 header[30] =
180 	{
181 		0xcc, 0x61, 0x5e, 0x00, 0x00, 0x01, 0x00, 0xff,
182 		0x07, 0x04, 0x00, 0x00, 0x09, 0x04, 0x00, 0x00,
183 		0xe4, 0x04, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
184 		0x00, 0x00, 0x00, 0x00, 0x01, 0x00
185 	};
186 	sal_uInt8 headerin[30];
187 
188 	xVBAProject->Read(headerin,30);
189 	if (memcmp(header,headerin,30) != 0)
190 		DBG_WARNING("Warning VBA header is different, please report");
191 	sal_uInt16 value;
192 	*xVBAProject >> value;
193 	//*pOut << "Trigger value 1 is " << value << endl;
194 	sal_uInt16 svalue;
195 	*xVBAProject >> svalue;
196 	if (svalue != 0x02)
197 		DBG_WARNING("Warning VBA number is different, please report");
198 
199 	int count=0;
200 	sal_uInt8 testc=0;
201 
202 	//*pOut << "Other strings after the middle are..." << endl;
203 	//There appears to be almost any number of strings acceptable
204 	//most begin with */G , and sometimes with
205 	//*/C. Those with G always have a trailer of 12 bytes, those
206 	//with C come in pairs, the first with no trailer, and the
207 	//second with one of 12 bytes. The following code attemts
208 	//to read these strings and ends when it reaches a sequence of
209 	//bytes which fails a test to be a valid string. So this
210 	//while loop here is the particular piece of code which is
211 	//very suspect and likely to be the cause of any crashes and
212 	//problems.
213 	while ((testc = ReadPString(xVBAProject)) != 0)
214 	{
215 		//*pOut << endl;
216 		//*pOut << "testcharacter is " << testc << endl;
217 		switch (testc)
218 		{
219 			case 'C':
220 				count++;
221 				if (count == 2)
222 				{
223 					Confirm12Zeros(xVBAProject);
224 					count=0;
225 				}
226 				break;
227 			default:
228 			case 'G':
229 				Confirm12Zeros(xVBAProject);
230 				break;
231 		}
232 	}
233 
234 	//appears to be a fixed 20 byte sequence here, and then the strings
235 	//continue
236 	ConfirmFixedMiddle(xVBAProject);
237 
238 	count=0;
239 	testc=0;
240 
241 	while ((testc = ReadPString(xVBAProject)) != 0)
242 	{
243 		//*pOut << endl;
244 		//*pOut << "testcharacter is " << testc << endl;
245 		switch (testc)
246 		{
247 			case 'C':
248 				count++;
249 				if (count == 2)
250 				{
251 					Confirm12Zeros(xVBAProject);
252 					count=0;
253 				}
254 				break;
255 			default:
256 			case 'G':
257 				Confirm12Zeros(xVBAProject);
258 				break;
259 		}
260 	}
261 
262 	//there *may* be another different 20byte fixed string
263 	ConfirmFixedMiddle2(xVBAProject);
264 
265 	//*pOut << "testc is " << testc << endl;
266 	//*pOut << "position is " << xVBAProject->Tell() << endl;
267 
268 	sal_uInt16 nModules;
269 	*xVBAProject >> nModules;
270 
271 	//begin section, this section isn't really 100% correct
272 	//*pOut << nModules << hex << " vba modules" << endl;
273 	xVBAProject->SeekRel(2*nModules);
274 	xVBAProject->SeekRel(4);
275 	//*pOut << "position is " << xVBAProject->Tell() << endl;
276 	ConfirmFixedOctect(xVBAProject);
277 
278 	sal_uInt16 junksize;
279 	while(junksize != 0xFFFF)
280 	{
281 		xVBAProject->Read(&junksize,2); // usually 18 02, sometimes 1e 02
282 		//but sometimes its a run of numbers until 0xffff, gagh!!!
283 		//*pOut << "position is " << xVBAProject->Tell() << "len is "
284 		//	<< junksize << endl;
285 	}
286 
287 	sal_uInt16 ftest;
288 	*xVBAProject >> ftest;
289 	if (ftest != 0xFFFF)
290 		xVBAProject->SeekRel(ftest);
291 	*xVBAProject >> ftest;
292 	if (ftest != 0xFFFF)
293 		xVBAProject->SeekRel(ftest);
294 
295 	xVBAProject->SeekRel(100);
296 	//*pOut << "position is " << xVBAProject->Tell() << endl;
297 	//end section
298 
299 
300 	*xVBAProject >> nOffsets;
301 	pOffsets = new VBAOffset_Impl[nOffsets];
302 	int i;
303 	for (i=0;i<nOffsets;i++)
304 		{
305 		sal_uInt8 discard;
306 		sal_uInt16 len;
307 		*xVBAProject >> len;
308 		int j;
309 		for (j=0;j<len/2;j++)
310 			{
311 			*xVBAProject >> discard;
312 			pOffsets[i].sName += discard;
313 			*xVBAProject >> discard;
314 			}
315 		*xVBAProject >> len;
316 		xVBAProject->SeekRel(len);
317 
318 		//begin section, another problem area
319 		*xVBAProject >> len;
320 		if (len == 0xFFFF)
321 		{
322 			xVBAProject->SeekRel(2);
323 			*xVBAProject >> len;
324 			xVBAProject->SeekRel(len);
325 		}
326 		else
327 			xVBAProject->SeekRel(len+2);
328 		//
329 		/* I have a theory that maybe you read a 16bit len, and
330 		 * if it has 0x02 for the second byte then it is a special
331 		 * token of its own that affects nothing else, otherwise
332 		 * it is a len of the following data. C. I must test this
333 		 * theory later.
334 		 */
335 		//end section
336 
337 		xVBAProject->SeekRel(8);
338 		sal_uInt8 no_of_octects;
339 		*xVBAProject >> no_of_octects;
340 		for(j=0;j<no_of_octects;j++)
341 			xVBAProject->SeekRel(8);
342 		xVBAProject->SeekRel(6);
343 
344 		*xVBAProject >> pOffsets[i].nOffset;
345 		//*pOut << pOffsets[i].pName.GetStr() << " at 0x" << hex << pOffsets[i].nOffset << endl;
346 		xVBAProject->SeekRel(2);
347 		}
348 
349 	//*pOut << endl;
350 	return(nOffsets);
351 	}
352 
Open(const String & rToplevel,const String & rSublevel)353 sal_Bool VBA_Impl::Open( const String &rToplevel,const String &rSublevel )
354 {
355 	/* beginning test for vba stuff */
356 	sal_Bool bRet = sal_False;
357 	SvStorageRef xMacros= xStor->OpenStorage(rToplevel);
358 	if( !xMacros.Is() || SVSTREAM_OK != xMacros->GetError() )
359 	{
360 		DBG_WARNING("No Macros Storage");
361 	}
362 	else
363 	{
364 		xVBA = xMacros->OpenStorage(rSublevel);
365 		if( !xVBA.Is() || SVSTREAM_OK != xVBA->GetError() )
366 		{
367 			DBG_WARNING("No Visual Basic in Storage");
368 		}
369 		else
370 		{
371 			if (ReadVBAProject(xVBA))
372 				bRet = sal_True;
373 		}
374 	}
375 	/* end test for vba stuff */
376 	return bRet;
377 }
378 
Decompress(sal_uInt16 nIndex,int * pOverflow)379 const String &VBA_Impl::Decompress( sal_uInt16 nIndex, int *pOverflow)
380 {
381 	SvStorageStreamRef xVBAStream;
382 	sVBAString.Erase();
383 
384 	DBG_ASSERT( nIndex < nOffsets, "Index out of range" );
385 	xVBAStream = xVBA->OpenStream( pOffsets[nIndex].sName,
386 						STREAM_STD_READ | STREAM_NOCREATE );
387 	if (pOverflow)
388 		*pOverflow=0;
389 	if( !xVBAStream.Is() || SVSTREAM_OK !=
390 		xVBAStream->GetError() )
391 	{
392 		DBG_WARNING("Not able to open vb module ");
393 //		DBG_WARNING((pOffsets[nIndex].sName).GetStr());
394 	}
395 	else
396 	{
397 		xVBAStream->SetNumberFormatInt( NUMBERFORMAT_INT_LITTLEENDIAN );
398 		DecompressVBA(nIndex,xVBAStream);
399 		/*
400 		 * if len was too big for a single string set that variable ?
401 		 *	if ((len > XX) && (pOverflow))
402 				*pOverflow=1;
403 		 */
404 		if (bCommented)
405 		{
406 			String sTempStringa(String::CreateFromAscii( "\x0D\x0A"));
407 			String sTempStringb(String::CreateFromAscii( "\x0D\x0ARem "));
408 			sVBAString.SearchAndReplaceAll(sTempStringa,sTempStringb);
409 			sVBAString.InsertAscii("Rem ",0);
410 		}
411 	}
412 	return sVBAString;
413 }
414 
415 
DecompressVBA(int nIndex,SvStorageStreamRef & xVBAStream)416 int VBA_Impl::DecompressVBA( int nIndex, SvStorageStreamRef &xVBAStream )
417 {
418 	sal_uInt8 leadbyte;
419 	unsigned int pos = 0;
420 
421 	//*pOut << "jumping to " << hex << offsets[nIndex].offset << endl;
422 	xVBAStream->Seek(pOffsets[nIndex].nOffset+3);
423 
424 	int len;
425 	sal_uInt16 token;
426 	int distance, shift, clean=1;
427 
428 	while(xVBAStream->Read(&leadbyte,1))
429 		{
430 		//*pOut << "reading 8 data unit block beginning with " << leadbyte << int(leadbyte) << " at pos " << xVBAStream->Tell() << " real pos " << pos << endl;
431 		for(int position=0x01;position < 0x100;position=position<<1)
432 			{
433 			//we see if the leadbyte has flagged this location as a dataunit
434 			//which is actually a token which must be looked up in the history
435 			if (leadbyte & position)
436 				{
437 				*xVBAStream >> token;
438 
439 				if (clean == 0)
440 					clean=1;
441 
442 				//For some reason the division of the token into the length
443 				//field of the data to be inserted, and the distance back into
444 				//the history differs depending on how full the history is
445 				int pos2 = pos%WINDOWLEN;
446 				if (pos2 <= 0x10)
447 					shift = 12;
448 				else if (pos2 <= 0x20)
449 					shift = 11;
450 				else if (pos2 <= 0x40)
451 					shift = 10;
452 				else if (pos2 <= 0x80)
453 					shift = 9;
454 				else if (pos2 <= 0x100)
455 					shift = 8;
456 				else if (pos2 <= 0x200)
457 					shift = 7;
458 				else if (pos2 <= 0x400)
459 					shift = 6;
460 				else if (pos2 <= 0x800)
461 					shift = 5;
462 				else
463 					shift = 4;
464 
465 				int i;
466 				len=0;
467 				for(i=0;i<shift;i++)
468 					len |= token & (1<<i);
469 
470 				//*pOut << endl << "match lookup token " << int(token) << "len " << int(len) << endl;
471 
472 				len += 3;
473 				//*pOut << endl << "len is " << len << "shift is " << shift << endl;
474 
475 				distance = token >> shift;
476 				//*pOut << "distance token shift is " << distance << " " << int(token) << " " << shift << "pos is " << pos << " " << xVBAStream->Tell() << endl;
477 
478 				//read the len of data from the history, wrapping around the
479 				//WINDOWLEN boundary if necessary
480 				//data read from the history is also copied into the recent
481 				//part of the history as well.
482 				for (i = 0; i < len; i++)
483 					{
484 					unsigned char c;
485 					//*pOut << endl << (pos%WINDOWLEN)-distance-1 << " " << pos << " " << distance << endl;
486 					c = aHistory[(pos-distance-1)%WINDOWLEN];
487 					aHistory[pos%WINDOWLEN] = c;
488 					pos++;
489 					//*pOut << "real pos is " << pos << endl;
490 					//
491 					//temp removed
492 					//*pOut << c ;
493 					}
494 				}
495 			else
496 				{
497 				// special boundary case code, not guarantueed to be correct
498 				// seems to work though, there is something wrong with the
499 				// compression scheme (or maybe a feature) where when
500 				// the data ends on a WINDOWLEN boundary and the excess
501 				// bytes in the 8 dataunit list are discarded, and not
502 				// interpreted as tokens or normal data.
503 				if ((pos != 0) && ((pos%WINDOWLEN) == 0) && (clean))
504 					{
505 					//*pOut << "at boundary position is " << position << " " << xVBAStream->Tell() << " pos is " << pos << endl;
506 					//if (position != 0x01)
507 					//*pOut << "must restart by eating remainder single byte data units" << endl;
508 					xVBAStream->SeekRel(2);
509 					clean=0;
510 					Output(WINDOWLEN,aHistory);
511 					break;
512 					}
513 				//This is the normal case for when the data unit is not a
514 				//token to be looked up, but instead some normal data which
515 				//can be output, and placed in the history.
516 				if (xVBAStream->Read(&aHistory[pos%WINDOWLEN],1))
517 				{
518 					pos++;
519 					//temp removed
520 					//*pOut << aHistory[pos++%WINDOWLEN];
521 				}
522 				if (clean == 0)
523 					clean=1;
524 				//*pOut << "pos is " << pos << " " << xVBAStream->Tell() << endl;
525 				}
526 			}
527 		}
528 	if (pos%WINDOWLEN)
529 		Output(pos%WINDOWLEN,aHistory);
530 	return(pos);
531 }
532 
533