1 /************************************************************************* 2 * 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * Copyright 2000, 2010 Oracle and/or its affiliates. 6 * 7 * OpenOffice.org - a multi-platform office productivity suite 8 * 9 * This file is part of OpenOffice.org. 10 * 11 * OpenOffice.org is free software: you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser General Public License version 3 13 * only, as published by the Free Software Foundation. 14 * 15 * OpenOffice.org is distributed in the hope that it will be useful, 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 * GNU Lesser General Public License version 3 for more details 19 * (a copy is included in the LICENSE file that accompanied this code). 20 * 21 * You should have received a copy of the GNU Lesser General Public License 22 * version 3 along with OpenOffice.org. If not, see 23 * <http://www.openoffice.org/license.html> 24 * for a copy of the LGPLv3 License. 25 * 26 ************************************************************************/ 27 28 #include "unichars.h" 29 #include "osl/diagnose.h" 30 #include "sal/types.h" 31 32 int ImplIsNoncharacter(sal_uInt32 nUtf32) 33 { 34 /* All code points that are noncharacters, as of Unicode 3.1.1. */ 35 return (nUtf32 >= 0xFDD0 && nUtf32 <= 0xFDEF) 36 || (nUtf32 & 0xFFFF) >= 0xFFFE 37 || nUtf32 > 0x10FFFF; 38 } 39 40 int ImplIsControlOrFormat(sal_uInt32 nUtf32) 41 { 42 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 43 UnicodeData.txt>, Version 3.1.1, that have a General Category of Cc 44 (Other, Control) or Cf (Other, Format). 45 */ 46 return nUtf32 <= 0x001F 47 || (nUtf32 >= 0x007F && nUtf32 <= 0x009F) 48 || nUtf32 == 0x070F /* SYRIAC ABBREVIATION MARK */ 49 || nUtf32 == 0x180B /* MONGOLIAN FREE VARIATION SELECTOR ONE */ 50 || nUtf32 == 0x180C /* MONGOLIAN FREE VARIATION SELECTOR TWO */ 51 || nUtf32 == 0x180D /* MONGOLIAN FREE VARIATION SELECTOR THREE */ 52 || nUtf32 == 0x180E /* MONGOLIAN VOWEL SEPARATOR */ 53 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 54 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 55 || nUtf32 == 0x200E /* LEFT-TO-RIGHT MARK */ 56 || nUtf32 == 0x200F /* RIGHT-TO-LEFT MARK */ 57 || nUtf32 == 0x202A /* LEFT-TO-RIGHT EMBEDDING */ 58 || nUtf32 == 0x202B /* RIGHT-TO-LEFT EMBEDDING */ 59 || nUtf32 == 0x202C /* POP DIRECTIONAL FORMATTING */ 60 || nUtf32 == 0x202D /* LEFT-TO-RIGHT OVERRIDE */ 61 || nUtf32 == 0x202E /* RIGHT-TO-LEFT OVERRIDE */ 62 || nUtf32 == 0x206A /* INHIBIT SYMMETRIC SWAPPING */ 63 || nUtf32 == 0x206B /* ACTIVATE SYMMETRIC SWAPPING */ 64 || nUtf32 == 0x206C /* INHIBIT ARABIC FORM SHAPING */ 65 || nUtf32 == 0x206D /* ACTIVATE ARABIC FORM SHAPING */ 66 || nUtf32 == 0x206E /* NATIONAL DIGIT SHAPES */ 67 || nUtf32 == 0x206F /* NOMINAL DIGIT SHAPES */ 68 || nUtf32 == 0xFEFF /* ZERO WIDTH NO-BREAK SPACE */ 69 || nUtf32 == 0xFFF9 /* INTERLINEAR ANNOTATION ANCHOR */ 70 || nUtf32 == 0xFFFA /* INTERLINEAR ANNOTATION SEPARATOR */ 71 || nUtf32 == 0xFFFB /* INTERLINEAR ANNOTATION TERMINATOR */ 72 || nUtf32 == 0x1D173 /* MUSICAL SYMBOL BEGIN BEAM */ 73 || nUtf32 == 0x1D174 /* MUSICAL SYMBOL END BEAM */ 74 || nUtf32 == 0x1D175 /* MUSICAL SYMBOL BEGIN TIE */ 75 || nUtf32 == 0x1D176 /* MUSICAL SYMBOL END TIE */ 76 || nUtf32 == 0x1D177 /* MUSICAL SYMBOL BEGIN SLUR */ 77 || nUtf32 == 0x1D178 /* MUSICAL SYMBOL END SLUR */ 78 || nUtf32 == 0x1D179 /* MUSICAL SYMBOL BEGIN PHRASE */ 79 || nUtf32 == 0x1D17A /* MUSICAL SYMBOL END PHRASE */ 80 || nUtf32 == 0xE0001 /* LANGUAGE TAG */ 81 || (nUtf32 >= 0xE0020 && nUtf32 <= 0xE007F); 82 } 83 84 int ImplIsHighSurrogate(sal_uInt32 nUtf32) 85 { 86 /* All code points that are high-surrogates, as of Unicode 3.1.1. */ 87 return nUtf32 >= 0xD800 && nUtf32 <= 0xDBFF; 88 } 89 90 int ImplIsLowSurrogate(sal_uInt32 nUtf32) 91 { 92 /* All code points that are low-surrogates, as of Unicode 3.1.1. */ 93 return nUtf32 >= 0xDC00 && nUtf32 <= 0xDFFF; 94 } 95 96 int ImplIsPrivateUse(sal_uInt32 nUtf32) 97 { 98 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 99 UnicodeData.txt>, Version 3.1.1, that have a General Category of Co 100 (Other, Private Use). 101 */ 102 return (nUtf32 >= 0xE000 && nUtf32 <= 0xF8FF) 103 || (nUtf32 >= 0xF0000 && nUtf32 <= 0xFFFFD) 104 || (nUtf32 >= 0x100000 && nUtf32 <= 0x10FFFD); 105 } 106 107 int ImplIsZeroWidth(sal_uInt32 nUtf32) 108 { 109 /* All code points of <http://www.unicode.org/Public/UNIDATA/ 110 UnicodeData.txt>, Version 3.1.1, that have "ZERO WIDTH" in their 111 Character name. 112 */ 113 return nUtf32 == 0x200B /* ZERO WIDTH SPACE */ 114 || nUtf32 == 0x200C /* ZERO WIDTH NON-JOINER */ 115 || nUtf32 == 0x200D /* ZERO WIDTH JOINER */ 116 || nUtf32 == 0xFEFF; /* ZEOR WIDTH NO-BREAK SPACE */ 117 } 118 119 sal_uInt32 ImplGetHighSurrogate(sal_uInt32 nUtf32) 120 { 121 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 122 return ((nUtf32 - 0x10000) >> 10) | 0xD800; 123 } 124 125 sal_uInt32 ImplGetLowSurrogate(sal_uInt32 nUtf32) 126 { 127 OSL_ENSURE(nUtf32 >= 0x10000, "specification violation"); 128 return ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00; 129 } 130 131 sal_uInt32 ImplCombineSurrogates(sal_uInt32 nHigh, sal_uInt32 nLow) 132 { 133 OSL_ENSURE(ImplIsHighSurrogate(nHigh) && ImplIsLowSurrogate(nLow), 134 "specification violation"); 135 return (((nHigh & 0x3FF) << 10) | (nLow & 0x3FF)) + 0x10000; 136 } 137