xref: /aoo42x/main/ucb/source/regexp/regexp.cxx (revision cdf0e10c)
1*cdf0e10cSrcweir /*************************************************************************
2*cdf0e10cSrcweir  *
3*cdf0e10cSrcweir  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4*cdf0e10cSrcweir  *
5*cdf0e10cSrcweir  * Copyright 2000, 2010 Oracle and/or its affiliates.
6*cdf0e10cSrcweir  *
7*cdf0e10cSrcweir  * OpenOffice.org - a multi-platform office productivity suite
8*cdf0e10cSrcweir  *
9*cdf0e10cSrcweir  * This file is part of OpenOffice.org.
10*cdf0e10cSrcweir  *
11*cdf0e10cSrcweir  * OpenOffice.org is free software: you can redistribute it and/or modify
12*cdf0e10cSrcweir  * it under the terms of the GNU Lesser General Public License version 3
13*cdf0e10cSrcweir  * only, as published by the Free Software Foundation.
14*cdf0e10cSrcweir  *
15*cdf0e10cSrcweir  * OpenOffice.org is distributed in the hope that it will be useful,
16*cdf0e10cSrcweir  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17*cdf0e10cSrcweir  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18*cdf0e10cSrcweir  * GNU Lesser General Public License version 3 for more details
19*cdf0e10cSrcweir  * (a copy is included in the LICENSE file that accompanied this code).
20*cdf0e10cSrcweir  *
21*cdf0e10cSrcweir  * You should have received a copy of the GNU Lesser General Public License
22*cdf0e10cSrcweir  * version 3 along with OpenOffice.org.  If not, see
23*cdf0e10cSrcweir  * <http://www.openoffice.org/license.html>
24*cdf0e10cSrcweir  * for a copy of the LGPLv3 License.
25*cdf0e10cSrcweir  *
26*cdf0e10cSrcweir  ************************************************************************/
27*cdf0e10cSrcweir 
28*cdf0e10cSrcweir // MARKER(update_precomp.py): autogen include statement, do not remove
29*cdf0e10cSrcweir #include "precompiled_ucb.hxx"
30*cdf0e10cSrcweir #include <regexp.hxx>
31*cdf0e10cSrcweir 
32*cdf0e10cSrcweir #include <cstddef>
33*cdf0e10cSrcweir 
34*cdf0e10cSrcweir #include "osl/diagnose.h"
35*cdf0e10cSrcweir #include <com/sun/star/lang/IllegalArgumentException.hpp>
36*cdf0e10cSrcweir #include <rtl/ustrbuf.hxx>
37*cdf0e10cSrcweir #include <rtl/ustring.hxx>
38*cdf0e10cSrcweir 
39*cdf0e10cSrcweir namespace unnamed_ucb_regexp {} using namespace unnamed_ucb_regexp;
40*cdf0e10cSrcweir 	// unnamed namespaces don't work well yet...
41*cdf0e10cSrcweir 
42*cdf0e10cSrcweir using namespace com::sun::star;
43*cdf0e10cSrcweir using namespace ucb_impl;
44*cdf0e10cSrcweir 
45*cdf0e10cSrcweir //============================================================================
46*cdf0e10cSrcweir //
47*cdf0e10cSrcweir //  Regexp
48*cdf0e10cSrcweir //
49*cdf0e10cSrcweir //============================================================================
50*cdf0e10cSrcweir 
51*cdf0e10cSrcweir inline Regexp::Regexp(Kind eTheKind, rtl::OUString const & rThePrefix,
52*cdf0e10cSrcweir 					  bool bTheEmptyDomain, rtl::OUString const & rTheInfix,
53*cdf0e10cSrcweir 					  bool bTheTranslation,
54*cdf0e10cSrcweir 					  rtl::OUString const & rTheReversePrefix):
55*cdf0e10cSrcweir 	m_eKind(eTheKind),
56*cdf0e10cSrcweir 	m_aPrefix(rThePrefix),
57*cdf0e10cSrcweir 	m_aInfix(rTheInfix),
58*cdf0e10cSrcweir 	m_aReversePrefix(rTheReversePrefix),
59*cdf0e10cSrcweir 	m_bEmptyDomain(bTheEmptyDomain),
60*cdf0e10cSrcweir 	m_bTranslation(bTheTranslation)
61*cdf0e10cSrcweir {
62*cdf0e10cSrcweir 	OSL_ASSERT(m_eKind == KIND_DOMAIN
63*cdf0e10cSrcweir 			   || !m_bEmptyDomain && m_aInfix.getLength() == 0);
64*cdf0e10cSrcweir 	OSL_ASSERT(m_bTranslation || m_aReversePrefix.getLength() == 0);
65*cdf0e10cSrcweir }
66*cdf0e10cSrcweir 
67*cdf0e10cSrcweir //============================================================================
68*cdf0e10cSrcweir namespace unnamed_ucb_regexp {
69*cdf0e10cSrcweir 
70*cdf0e10cSrcweir bool matchStringIgnoreCase(sal_Unicode const ** pBegin,
71*cdf0e10cSrcweir 						   sal_Unicode const * pEnd,
72*cdf0e10cSrcweir 						   rtl::OUString const & rString)
73*cdf0e10cSrcweir {
74*cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
75*cdf0e10cSrcweir 
76*cdf0e10cSrcweir 	sal_Unicode const * q = rString.getStr();
77*cdf0e10cSrcweir 	sal_Unicode const * qEnd = q + rString.getLength();
78*cdf0e10cSrcweir 
79*cdf0e10cSrcweir 	if (pEnd - p < qEnd - q)
80*cdf0e10cSrcweir 		return false;
81*cdf0e10cSrcweir 
82*cdf0e10cSrcweir 	while (q != qEnd)
83*cdf0e10cSrcweir 	{
84*cdf0e10cSrcweir 		sal_Unicode c1 = *p++;
85*cdf0e10cSrcweir 		sal_Unicode c2 = *q++;
86*cdf0e10cSrcweir 		if (c1 >= 'a' && c1 <= 'z')
87*cdf0e10cSrcweir 			c1 -= 'a' - 'A';
88*cdf0e10cSrcweir 		if (c2 >= 'a' && c2 <= 'z')
89*cdf0e10cSrcweir 			c2 -= 'a' - 'A';
90*cdf0e10cSrcweir 		if (c1 != c2)
91*cdf0e10cSrcweir 			return false;
92*cdf0e10cSrcweir 	}
93*cdf0e10cSrcweir 
94*cdf0e10cSrcweir 	*pBegin = p;
95*cdf0e10cSrcweir 	return true;
96*cdf0e10cSrcweir }
97*cdf0e10cSrcweir 
98*cdf0e10cSrcweir }
99*cdf0e10cSrcweir 
100*cdf0e10cSrcweir bool Regexp::matches(rtl::OUString const & rString,
101*cdf0e10cSrcweir 					 rtl::OUString * pTranslation, bool * pTranslated) const
102*cdf0e10cSrcweir {
103*cdf0e10cSrcweir 	sal_Unicode const * pBegin = rString.getStr();
104*cdf0e10cSrcweir 	sal_Unicode const * pEnd = pBegin + rString.getLength();
105*cdf0e10cSrcweir 
106*cdf0e10cSrcweir 	bool bMatches = false;
107*cdf0e10cSrcweir 
108*cdf0e10cSrcweir 	sal_Unicode const * p = pBegin;
109*cdf0e10cSrcweir 	if (matchStringIgnoreCase(&p, pEnd, m_aPrefix))
110*cdf0e10cSrcweir 	{
111*cdf0e10cSrcweir 		sal_Unicode const * pBlock1Begin = p;
112*cdf0e10cSrcweir 		sal_Unicode const * pBlock1End = pEnd;
113*cdf0e10cSrcweir 
114*cdf0e10cSrcweir 		sal_Unicode const * pBlock2Begin = 0;
115*cdf0e10cSrcweir 		sal_Unicode const * pBlock2End = 0;
116*cdf0e10cSrcweir 
117*cdf0e10cSrcweir 		switch (m_eKind)
118*cdf0e10cSrcweir 		{
119*cdf0e10cSrcweir 			case KIND_PREFIX:
120*cdf0e10cSrcweir 				bMatches = true;
121*cdf0e10cSrcweir 				break;
122*cdf0e10cSrcweir 
123*cdf0e10cSrcweir 			case KIND_AUTHORITY:
124*cdf0e10cSrcweir 				bMatches = p == pEnd || *p == '/' || *p == '?' || *p == '#';
125*cdf0e10cSrcweir 				break;
126*cdf0e10cSrcweir 
127*cdf0e10cSrcweir 			case KIND_DOMAIN:
128*cdf0e10cSrcweir 				if (!m_bEmptyDomain)
129*cdf0e10cSrcweir 				{
130*cdf0e10cSrcweir 					if (p == pEnd || *p == '/' || *p == '?' || *p == '#')
131*cdf0e10cSrcweir 						break;
132*cdf0e10cSrcweir 					++p;
133*cdf0e10cSrcweir 				}
134*cdf0e10cSrcweir 				for (;;)
135*cdf0e10cSrcweir 				{
136*cdf0e10cSrcweir 					sal_Unicode const * q = p;
137*cdf0e10cSrcweir 					if (matchStringIgnoreCase(&q, pEnd, m_aInfix)
138*cdf0e10cSrcweir 						&& (q == pEnd || *q == '/' || *q == '?' || *q == '#'))
139*cdf0e10cSrcweir 					{
140*cdf0e10cSrcweir 						bMatches = true;
141*cdf0e10cSrcweir 						pBlock1End = p;
142*cdf0e10cSrcweir 						pBlock2Begin = q;
143*cdf0e10cSrcweir 						pBlock2End = pEnd;
144*cdf0e10cSrcweir 						break;
145*cdf0e10cSrcweir 					}
146*cdf0e10cSrcweir 
147*cdf0e10cSrcweir 					if (p == pEnd)
148*cdf0e10cSrcweir 						break;
149*cdf0e10cSrcweir 
150*cdf0e10cSrcweir 					sal_Unicode c = *p++;
151*cdf0e10cSrcweir 					if (c == '/' || c == '?' || c == '#')
152*cdf0e10cSrcweir 						break;
153*cdf0e10cSrcweir 				}
154*cdf0e10cSrcweir 				break;
155*cdf0e10cSrcweir 		}
156*cdf0e10cSrcweir 
157*cdf0e10cSrcweir 		if (bMatches)
158*cdf0e10cSrcweir 		{
159*cdf0e10cSrcweir 			if (m_bTranslation)
160*cdf0e10cSrcweir 			{
161*cdf0e10cSrcweir 				if (pTranslation)
162*cdf0e10cSrcweir 				{
163*cdf0e10cSrcweir 					rtl::OUStringBuffer aBuffer(m_aReversePrefix);
164*cdf0e10cSrcweir 					aBuffer.append(pBlock1Begin, pBlock1End - pBlock1Begin);
165*cdf0e10cSrcweir 					aBuffer.append(m_aInfix);
166*cdf0e10cSrcweir 					aBuffer.append(pBlock2Begin, pBlock2End - pBlock2Begin);
167*cdf0e10cSrcweir 					*pTranslation = aBuffer.makeStringAndClear();
168*cdf0e10cSrcweir 				}
169*cdf0e10cSrcweir 				if (pTranslated)
170*cdf0e10cSrcweir 					*pTranslated = true;
171*cdf0e10cSrcweir 			}
172*cdf0e10cSrcweir 			else
173*cdf0e10cSrcweir 			{
174*cdf0e10cSrcweir 				if (pTranslation)
175*cdf0e10cSrcweir 					*pTranslation = rString;
176*cdf0e10cSrcweir 				if (pTranslated)
177*cdf0e10cSrcweir 					*pTranslated = false;
178*cdf0e10cSrcweir 			}
179*cdf0e10cSrcweir 		}
180*cdf0e10cSrcweir 	}
181*cdf0e10cSrcweir 
182*cdf0e10cSrcweir 	return bMatches;
183*cdf0e10cSrcweir }
184*cdf0e10cSrcweir 
185*cdf0e10cSrcweir //============================================================================
186*cdf0e10cSrcweir namespace unnamed_ucb_regexp {
187*cdf0e10cSrcweir 
188*cdf0e10cSrcweir inline bool isAlpha(sal_Unicode c)
189*cdf0e10cSrcweir {
190*cdf0e10cSrcweir 	return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
191*cdf0e10cSrcweir }
192*cdf0e10cSrcweir 
193*cdf0e10cSrcweir inline bool isDigit(sal_Unicode c)
194*cdf0e10cSrcweir {
195*cdf0e10cSrcweir 	return c >= '0' && c <= '9';
196*cdf0e10cSrcweir }
197*cdf0e10cSrcweir 
198*cdf0e10cSrcweir bool isScheme(rtl::OUString const & rString, bool bColon)
199*cdf0e10cSrcweir {
200*cdf0e10cSrcweir 	// Return true if rString matches <scheme> (plus a trailing ":" if bColon
201*cdf0e10cSrcweir     // is true) from RFC 2396:
202*cdf0e10cSrcweir 	sal_Unicode const * p = rString.getStr();
203*cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rString.getLength();
204*cdf0e10cSrcweir 	if (p != pEnd && isAlpha(*p))
205*cdf0e10cSrcweir 		for (++p;;)
206*cdf0e10cSrcweir 		{
207*cdf0e10cSrcweir 			if (p == pEnd)
208*cdf0e10cSrcweir 				return !bColon;
209*cdf0e10cSrcweir 			sal_Unicode c = *p++;
210*cdf0e10cSrcweir 			if (!(isAlpha(c) || isDigit(c)
211*cdf0e10cSrcweir                   || c == '+' || c == '-' || c == '.'))
212*cdf0e10cSrcweir                 return bColon && c == ':' && p == pEnd;
213*cdf0e10cSrcweir 		}
214*cdf0e10cSrcweir 	return false;
215*cdf0e10cSrcweir }
216*cdf0e10cSrcweir 
217*cdf0e10cSrcweir void appendStringLiteral(rtl::OUStringBuffer * pBuffer,
218*cdf0e10cSrcweir 						 rtl::OUString const & rString)
219*cdf0e10cSrcweir {
220*cdf0e10cSrcweir 	OSL_ASSERT(pBuffer);
221*cdf0e10cSrcweir 
222*cdf0e10cSrcweir 	pBuffer->append(sal_Unicode('"'));
223*cdf0e10cSrcweir 	sal_Unicode const * p = rString.getStr();
224*cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rString.getLength();
225*cdf0e10cSrcweir 	while (p != pEnd)
226*cdf0e10cSrcweir 	{
227*cdf0e10cSrcweir 		sal_Unicode c = *p++;
228*cdf0e10cSrcweir 		if (c == '"' || c == '\\')
229*cdf0e10cSrcweir 			pBuffer->append(sal_Unicode('\\'));
230*cdf0e10cSrcweir 		pBuffer->append(c);
231*cdf0e10cSrcweir 	}
232*cdf0e10cSrcweir 	pBuffer->append(sal_Unicode('"'));
233*cdf0e10cSrcweir }
234*cdf0e10cSrcweir 
235*cdf0e10cSrcweir }
236*cdf0e10cSrcweir 
237*cdf0e10cSrcweir rtl::OUString Regexp::getRegexp(bool bReverse) const
238*cdf0e10cSrcweir {
239*cdf0e10cSrcweir 	if (m_bTranslation)
240*cdf0e10cSrcweir 	{
241*cdf0e10cSrcweir 		rtl::OUStringBuffer aBuffer;
242*cdf0e10cSrcweir 		if (bReverse)
243*cdf0e10cSrcweir 		{
244*cdf0e10cSrcweir 			if (m_aReversePrefix.getLength() != 0)
245*cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aReversePrefix);
246*cdf0e10cSrcweir 		}
247*cdf0e10cSrcweir 		else
248*cdf0e10cSrcweir 		{
249*cdf0e10cSrcweir 			if (m_aPrefix.getLength() != 0)
250*cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aPrefix);
251*cdf0e10cSrcweir 		}
252*cdf0e10cSrcweir 		switch (m_eKind)
253*cdf0e10cSrcweir 		{
254*cdf0e10cSrcweir 			case KIND_PREFIX:
255*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("(.*)"));
256*cdf0e10cSrcweir 				break;
257*cdf0e10cSrcweir 
258*cdf0e10cSrcweir 			case KIND_AUTHORITY:
259*cdf0e10cSrcweir 				aBuffer.
260*cdf0e10cSrcweir 					appendAscii(RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)"));
261*cdf0e10cSrcweir 				break;
262*cdf0e10cSrcweir 
263*cdf0e10cSrcweir 			case KIND_DOMAIN:
264*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([^/?#]"));
265*cdf0e10cSrcweir 				aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
266*cdf0e10cSrcweir 				if (m_aInfix.getLength() != 0)
267*cdf0e10cSrcweir 					appendStringLiteral(&aBuffer, m_aInfix);
268*cdf0e10cSrcweir 				aBuffer.
269*cdf0e10cSrcweir 					appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?)"));
270*cdf0e10cSrcweir 				break;
271*cdf0e10cSrcweir 		}
272*cdf0e10cSrcweir 		aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("->"));
273*cdf0e10cSrcweir 		if (bReverse)
274*cdf0e10cSrcweir 		{
275*cdf0e10cSrcweir 			if (m_aPrefix.getLength() != 0)
276*cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aPrefix);
277*cdf0e10cSrcweir 		}
278*cdf0e10cSrcweir 		else
279*cdf0e10cSrcweir 		{
280*cdf0e10cSrcweir 			if (m_aReversePrefix.getLength() != 0)
281*cdf0e10cSrcweir 				appendStringLiteral(&aBuffer, m_aReversePrefix);
282*cdf0e10cSrcweir 		}
283*cdf0e10cSrcweir 		aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("\\1"));
284*cdf0e10cSrcweir 		return aBuffer.makeStringAndClear();
285*cdf0e10cSrcweir 	}
286*cdf0e10cSrcweir 	else if (m_eKind == KIND_PREFIX && isScheme(m_aPrefix, true))
287*cdf0e10cSrcweir 		return m_aPrefix.copy(0, m_aPrefix.getLength() - 1);
288*cdf0e10cSrcweir 	else
289*cdf0e10cSrcweir 	{
290*cdf0e10cSrcweir 		rtl::OUStringBuffer aBuffer;
291*cdf0e10cSrcweir 		if (m_aPrefix.getLength() != 0)
292*cdf0e10cSrcweir 			appendStringLiteral(&aBuffer, m_aPrefix);
293*cdf0e10cSrcweir 		switch (m_eKind)
294*cdf0e10cSrcweir 		{
295*cdf0e10cSrcweir 			case KIND_PREFIX:
296*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM(".*"));
297*cdf0e10cSrcweir 				break;
298*cdf0e10cSrcweir 
299*cdf0e10cSrcweir 			case KIND_AUTHORITY:
300*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
301*cdf0e10cSrcweir 				break;
302*cdf0e10cSrcweir 
303*cdf0e10cSrcweir 			case KIND_DOMAIN:
304*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("[^/?#]"));
305*cdf0e10cSrcweir 				aBuffer.append(sal_Unicode(m_bEmptyDomain ? '*' : '+'));
306*cdf0e10cSrcweir 				if (m_aInfix.getLength() != 0)
307*cdf0e10cSrcweir 					appendStringLiteral(&aBuffer, m_aInfix);
308*cdf0e10cSrcweir 				aBuffer.appendAscii(RTL_CONSTASCII_STRINGPARAM("([/?#].*)?"));
309*cdf0e10cSrcweir 				break;
310*cdf0e10cSrcweir 		}
311*cdf0e10cSrcweir 		return aBuffer.makeStringAndClear();
312*cdf0e10cSrcweir 	}
313*cdf0e10cSrcweir }
314*cdf0e10cSrcweir 
315*cdf0e10cSrcweir //============================================================================
316*cdf0e10cSrcweir namespace unnamed_ucb_regexp {
317*cdf0e10cSrcweir 
318*cdf0e10cSrcweir bool matchString(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
319*cdf0e10cSrcweir 				 sal_Char const * pString, size_t nStringLength)
320*cdf0e10cSrcweir {
321*cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
322*cdf0e10cSrcweir 
323*cdf0e10cSrcweir 	sal_uChar const * q = reinterpret_cast< sal_uChar const * >(pString);
324*cdf0e10cSrcweir 	sal_uChar const * qEnd = q + nStringLength;
325*cdf0e10cSrcweir 
326*cdf0e10cSrcweir 	if (pEnd - p < qEnd - q)
327*cdf0e10cSrcweir 		return false;
328*cdf0e10cSrcweir 
329*cdf0e10cSrcweir 	while (q != qEnd)
330*cdf0e10cSrcweir 	{
331*cdf0e10cSrcweir 		sal_Unicode c1 = *p++;
332*cdf0e10cSrcweir 		sal_Unicode c2 = *q++;
333*cdf0e10cSrcweir 		if (c1 != c2)
334*cdf0e10cSrcweir 			return false;
335*cdf0e10cSrcweir 	}
336*cdf0e10cSrcweir 
337*cdf0e10cSrcweir 	*pBegin = p;
338*cdf0e10cSrcweir 	return true;
339*cdf0e10cSrcweir }
340*cdf0e10cSrcweir 
341*cdf0e10cSrcweir bool scanStringLiteral(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
342*cdf0e10cSrcweir 					   rtl::OUString * pString)
343*cdf0e10cSrcweir {
344*cdf0e10cSrcweir 	sal_Unicode const * p = *pBegin;
345*cdf0e10cSrcweir 
346*cdf0e10cSrcweir 	if (p == pEnd || *p++ != '"')
347*cdf0e10cSrcweir 		return false;
348*cdf0e10cSrcweir 
349*cdf0e10cSrcweir 	rtl::OUStringBuffer aBuffer;
350*cdf0e10cSrcweir 	for (;;)
351*cdf0e10cSrcweir 	{
352*cdf0e10cSrcweir 		if (p == pEnd)
353*cdf0e10cSrcweir 			return false;
354*cdf0e10cSrcweir 		sal_Unicode c = *p++;
355*cdf0e10cSrcweir 		if (c == '"')
356*cdf0e10cSrcweir 			break;
357*cdf0e10cSrcweir 		if (c == '\\')
358*cdf0e10cSrcweir 		{
359*cdf0e10cSrcweir 			if (p == pEnd)
360*cdf0e10cSrcweir 				return false;
361*cdf0e10cSrcweir 			c = *p++;
362*cdf0e10cSrcweir 			if (c != '"' && c != '\\')
363*cdf0e10cSrcweir 				return false;
364*cdf0e10cSrcweir 		}
365*cdf0e10cSrcweir 		aBuffer.append(c);
366*cdf0e10cSrcweir 	}
367*cdf0e10cSrcweir 
368*cdf0e10cSrcweir 	*pBegin = p;
369*cdf0e10cSrcweir 	*pString = aBuffer.makeStringAndClear();
370*cdf0e10cSrcweir 	return true;
371*cdf0e10cSrcweir }
372*cdf0e10cSrcweir 
373*cdf0e10cSrcweir }
374*cdf0e10cSrcweir 
375*cdf0e10cSrcweir Regexp Regexp::parse(rtl::OUString const & rRegexp)
376*cdf0e10cSrcweir {
377*cdf0e10cSrcweir 	// Detect an input of '<scheme>' as an abbreviation of '"<scheme>:".*'
378*cdf0e10cSrcweir 	// where <scheme> is as defined in RFC 2396:
379*cdf0e10cSrcweir 	if (isScheme(rRegexp, false))
380*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX,
381*cdf0e10cSrcweir                       rRegexp
382*cdf0e10cSrcweir                           + rtl::OUString(RTL_CONSTASCII_USTRINGPARAM(":")),
383*cdf0e10cSrcweir                       false,
384*cdf0e10cSrcweir                       rtl::OUString(),
385*cdf0e10cSrcweir 					  false,
386*cdf0e10cSrcweir                       rtl::OUString());
387*cdf0e10cSrcweir 
388*cdf0e10cSrcweir 	sal_Unicode const * p = rRegexp.getStr();
389*cdf0e10cSrcweir 	sal_Unicode const * pEnd = p + rRegexp.getLength();
390*cdf0e10cSrcweir 
391*cdf0e10cSrcweir 	rtl::OUString aPrefix;
392*cdf0e10cSrcweir 	scanStringLiteral(&p, pEnd, &aPrefix);
393*cdf0e10cSrcweir 
394*cdf0e10cSrcweir 	if (p == pEnd)
395*cdf0e10cSrcweir 		throw lang::IllegalArgumentException();
396*cdf0e10cSrcweir 
397*cdf0e10cSrcweir 	if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(".*")))
398*cdf0e10cSrcweir 	{
399*cdf0e10cSrcweir 		if (p != pEnd)
400*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
401*cdf0e10cSrcweir 
402*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
403*cdf0e10cSrcweir 					  false, rtl::OUString());
404*cdf0e10cSrcweir 	}
405*cdf0e10cSrcweir 	else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("(.*)->")))
406*cdf0e10cSrcweir 	{
407*cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
408*cdf0e10cSrcweir 		scanStringLiteral(&p, pEnd, &aReversePrefix);
409*cdf0e10cSrcweir 
410*cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
411*cdf0e10cSrcweir 			|| p != pEnd)
412*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
413*cdf0e10cSrcweir 
414*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_PREFIX, aPrefix, false, rtl::OUString(),
415*cdf0e10cSrcweir 					  true, aReversePrefix);
416*cdf0e10cSrcweir 	}
417*cdf0e10cSrcweir 	else if (matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
418*cdf0e10cSrcweir 	{
419*cdf0e10cSrcweir 		if (p != pEnd)
420*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
421*cdf0e10cSrcweir 
422*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
423*cdf0e10cSrcweir 					  false, rtl::OUString());
424*cdf0e10cSrcweir 	}
425*cdf0e10cSrcweir 	else if (matchString(&p, pEnd,
426*cdf0e10cSrcweir 						 RTL_CONSTASCII_STRINGPARAM("(([/?#].*)?)->")))
427*cdf0e10cSrcweir 	{
428*cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
429*cdf0e10cSrcweir 		if (!(scanStringLiteral(&p, pEnd, &aReversePrefix)
430*cdf0e10cSrcweir 			  && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))
431*cdf0e10cSrcweir 			  && p == pEnd))
432*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
433*cdf0e10cSrcweir 
434*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_AUTHORITY, aPrefix, false, rtl::OUString(),
435*cdf0e10cSrcweir 					  true, aReversePrefix);
436*cdf0e10cSrcweir 	}
437*cdf0e10cSrcweir 	else
438*cdf0e10cSrcweir 	{
439*cdf0e10cSrcweir 		bool bOpen = false;
440*cdf0e10cSrcweir 		if (p != pEnd && *p == '(')
441*cdf0e10cSrcweir 		{
442*cdf0e10cSrcweir 			++p;
443*cdf0e10cSrcweir 			bOpen = true;
444*cdf0e10cSrcweir 		}
445*cdf0e10cSrcweir 
446*cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("[^/?#]")))
447*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
448*cdf0e10cSrcweir 
449*cdf0e10cSrcweir 		if (p == pEnd || (*p != '*' && *p != '+'))
450*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
451*cdf0e10cSrcweir 		bool bEmptyDomain = *p++ == '*';
452*cdf0e10cSrcweir 
453*cdf0e10cSrcweir 		rtl::OUString aInfix;
454*cdf0e10cSrcweir 		scanStringLiteral(&p, pEnd, &aInfix);
455*cdf0e10cSrcweir 
456*cdf0e10cSrcweir 		if (!matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("([/?#].*)?")))
457*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
458*cdf0e10cSrcweir 
459*cdf0e10cSrcweir 		rtl::OUString aReversePrefix;
460*cdf0e10cSrcweir 		if (bOpen
461*cdf0e10cSrcweir 			&& !(matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM(")->"))
462*cdf0e10cSrcweir 				 && scanStringLiteral(&p, pEnd, &aReversePrefix)
463*cdf0e10cSrcweir 				 && matchString(&p, pEnd, RTL_CONSTASCII_STRINGPARAM("\\1"))))
464*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
465*cdf0e10cSrcweir 
466*cdf0e10cSrcweir 		if (p != pEnd)
467*cdf0e10cSrcweir 			throw lang::IllegalArgumentException();
468*cdf0e10cSrcweir 
469*cdf0e10cSrcweir 		return Regexp(Regexp::KIND_DOMAIN, aPrefix, bEmptyDomain, aInfix,
470*cdf0e10cSrcweir 					  bOpen, aReversePrefix);
471*cdf0e10cSrcweir 	}
472*cdf0e10cSrcweir }
473*cdf0e10cSrcweir 
474