1*b5da552cSAndrew Rist /************************************************************** 2cdf0e10cSrcweir * 3*b5da552cSAndrew Rist * Licensed to the Apache Software Foundation (ASF) under one 4*b5da552cSAndrew Rist * or more contributor license agreements. See the NOTICE file 5*b5da552cSAndrew Rist * distributed with this work for additional information 6*b5da552cSAndrew Rist * regarding copyright ownership. The ASF licenses this file 7*b5da552cSAndrew Rist * to you under the Apache License, Version 2.0 (the 8*b5da552cSAndrew Rist * "License"); you may not use this file except in compliance 9*b5da552cSAndrew Rist * with the License. You may obtain a copy of the License at 10*b5da552cSAndrew Rist * 11*b5da552cSAndrew Rist * http://www.apache.org/licenses/LICENSE-2.0 12*b5da552cSAndrew Rist * 13*b5da552cSAndrew Rist * Unless required by applicable law or agreed to in writing, 14*b5da552cSAndrew Rist * software distributed under the License is distributed on an 15*b5da552cSAndrew Rist * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 16*b5da552cSAndrew Rist * KIND, either express or implied. See the License for the 17*b5da552cSAndrew Rist * specific language governing permissions and limitations 18*b5da552cSAndrew Rist * under the License. 19*b5da552cSAndrew Rist * 20*b5da552cSAndrew Rist *************************************************************/ 21*b5da552cSAndrew Rist 22*b5da552cSAndrew Rist 23cdf0e10cSrcweir 24cdf0e10cSrcweir 25cdf0e10cSrcweir /* 26cdf0e10cSrcweir * Implements osl_[increment|decrement]InterlockedCount in two ways: 27cdf0e10cSrcweir * sparcv8 architecture: use the "swap" instruction 28cdf0e10cSrcweir * sparcv9/sparcv8plus architecture: use the "cas" instruction 29cdf0e10cSrcweir * 30cdf0e10cSrcweir * 32 bit mode with v8 and v8plus support: 31cdf0e10cSrcweir * Initialize once with osl_InterlockedCountSetV9(int bv9) if you want to 32cdf0e10cSrcweir * use the "cas" instruction, which is faster (no spinlock needed) 33cdf0e10cSrcweir * Default is to use the "swap" instruction, which works on all supported 34cdf0e10cSrcweir * SPARC cpu's 35cdf0e10cSrcweir * 36cdf0e10cSrcweir * osl_InterlockedCountSetV9(int bv9) 37cdf0e10cSrcweir * bv9 = 0 use sparcv8 "swap" (spinlock) 38cdf0e10cSrcweir * bv9 = 1 use sparcv9/sparcv8plus "cas" (no spinlock) 39cdf0e10cSrcweir * 40cdf0e10cSrcweir * 32 bit mode without v8 support (implies v8plus) or 64 bit mode: 41cdf0e10cSrcweir * No need (nor the possibilty) to call osl_InterlockedCountSetV9(), 42cdf0e10cSrcweir * sparcv9 mode is implied. Assemble with -xarch=v8plus (32 bit) or 43cdf0e10cSrcweir * -xarch=v9 (64 bit). 44cdf0e10cSrcweir * 45cdf0e10cSrcweir */ 46cdf0e10cSrcweir 47cdf0e10cSrcweir #if !defined(__sparcv8plus) && !defined(__sparcv9) && !defined(__sparc_v9__) 48cdf0e10cSrcweir 49cdf0e10cSrcweir .section ".data" 50cdf0e10cSrcweir .align 4 51cdf0e10cSrcweir osl_incrementInterLockCountFuncPtr: 52cdf0e10cSrcweir .word osl_incrementInterlockedCountV8 53cdf0e10cSrcweir .type osl_incrementInterLockCountFuncPtr,#object 54cdf0e10cSrcweir .size osl_incrementInterLockCountFuncPtr,4 55cdf0e10cSrcweir 56cdf0e10cSrcweir .align 4 57cdf0e10cSrcweir osl_decrementInterLockCountFuncPtr: 58cdf0e10cSrcweir .word osl_decrementInterlockedCountV8 59cdf0e10cSrcweir .type osl_decrementInterLockCountFuncPtr,#object 60cdf0e10cSrcweir .size osl_decrementInterLockCountFuncPtr,4 61cdf0e10cSrcweir 62cdf0e10cSrcweir .section ".text" 63cdf0e10cSrcweir 64cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 65cdf0e10cSrcweir /* add the address of the calling "call" instruction (stored in %o7) to 66cdf0e10cSrcweir * %o5 which contains _GLOBAL_OFFSET_TABLE_ 67cdf0e10cSrcweir */ 68cdf0e10cSrcweir .Laddoseven: 69cdf0e10cSrcweir retl 70cdf0e10cSrcweir add %o7, %o5, %o5 71cdf0e10cSrcweir #endif 72cdf0e10cSrcweir 73cdf0e10cSrcweir .global osl_incrementInterlockedCount 74cdf0e10cSrcweir .align 4 75cdf0e10cSrcweir 76cdf0e10cSrcweir osl_incrementInterlockedCount: 77cdf0e10cSrcweir 78cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 79cdf0e10cSrcweir mov %o7, %g1 80cdf0e10cSrcweir sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o5 81cdf0e10cSrcweir call .Laddoseven 82cdf0e10cSrcweir add %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5 83cdf0e10cSrcweir mov %g1, %o7 84cdf0e10cSrcweir #endif 85cdf0e10cSrcweir set osl_incrementInterLockCountFuncPtr, %o1 86cdf0e10cSrcweir #if defined(NETBSD) 87cdf0e10cSrcweir ld [%o1 + %o5], %o1 88cdf0e10cSrcweir #endif 89cdf0e10cSrcweir ld [%o1], %o1 90cdf0e10cSrcweir jmp %o1 91cdf0e10cSrcweir nop ! delay slot 92cdf0e10cSrcweir .type osl_incrementInterlockedCount,#function 93cdf0e10cSrcweir .size osl_incrementInterlockedCount,.-osl_incrementInterlockedCount 94cdf0e10cSrcweir 95cdf0e10cSrcweir .section ".text" 96cdf0e10cSrcweir .global osl_decrementInterlockedCount 97cdf0e10cSrcweir .align 4 98cdf0e10cSrcweir 99cdf0e10cSrcweir osl_decrementInterlockedCount: 100cdf0e10cSrcweir 101cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 102cdf0e10cSrcweir mov %o7, %g1 103cdf0e10cSrcweir sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o5 104cdf0e10cSrcweir call .Laddoseven 105cdf0e10cSrcweir add %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5 106cdf0e10cSrcweir mov %g1, %o7 107cdf0e10cSrcweir #endif 108cdf0e10cSrcweir set osl_decrementInterLockCountFuncPtr, %o1 109cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 110cdf0e10cSrcweir ld [%o1 + %o5], %o1 111cdf0e10cSrcweir #endif 112cdf0e10cSrcweir ld [%o1], %o1 113cdf0e10cSrcweir jmp %o1 114cdf0e10cSrcweir nop ! delay slot 115cdf0e10cSrcweir .type osl_decrementInterlockedCount,#function 116cdf0e10cSrcweir .size osl_decrementInterlockedCount,.-osl_decrementInterlockedCount 117cdf0e10cSrcweir 118cdf0e10cSrcweir .section ".text" 119cdf0e10cSrcweir .global osl_InterlockedCountSetV9 120cdf0e10cSrcweir .align 4 121cdf0e10cSrcweir 122cdf0e10cSrcweir osl_InterlockedCountSetV9: 123cdf0e10cSrcweir 124cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 125cdf0e10cSrcweir mov %o7, %g1 126cdf0e10cSrcweir sethi %hi(_GLOBAL_OFFSET_TABLE_-4), %o5 127cdf0e10cSrcweir call .Laddoseven 128cdf0e10cSrcweir add %o5, %lo(_GLOBAL_OFFSET_TABLE_+4), %o5 129cdf0e10cSrcweir mov %g1, %o7 130cdf0e10cSrcweir #endif 131cdf0e10cSrcweir set osl_incrementInterLockCountFuncPtr, %o1 132cdf0e10cSrcweir set osl_decrementInterLockCountFuncPtr, %o2 133cdf0e10cSrcweir cmp %o0, %g0 134cdf0e10cSrcweir bnz 1f 135cdf0e10cSrcweir nop ! delay slot 136cdf0e10cSrcweir set osl_incrementInterlockedCountV8, %o0 137cdf0e10cSrcweir set osl_decrementInterlockedCountV8, %o3 138cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 139cdf0e10cSrcweir ld [%o0 + %o5], %o0 140cdf0e10cSrcweir ld [%o1 + %o5], %o1 141cdf0e10cSrcweir ld [%o2 + %o5], %o2 142cdf0e10cSrcweir ld [%o3 + %o5], %o3 143cdf0e10cSrcweir #endif 144cdf0e10cSrcweir st %o3,[%o2] 145cdf0e10cSrcweir retl 146cdf0e10cSrcweir st %o0,[%o1] 147cdf0e10cSrcweir 1: set osl_incrementInterlockedCountV9, %o0 148cdf0e10cSrcweir set osl_decrementInterlockedCountV9, %o3 149cdf0e10cSrcweir #if defined(NETBSD) || defined(LINUX) 150cdf0e10cSrcweir ld [%o0 + %o5], %o0 151cdf0e10cSrcweir ld [%o1 + %o5], %o1 152cdf0e10cSrcweir ld [%o2 + %o5], %o2 153cdf0e10cSrcweir ld [%o3 + %o5], %o3 154cdf0e10cSrcweir #endif 155cdf0e10cSrcweir st %o3,[%o2] 156cdf0e10cSrcweir retl 157cdf0e10cSrcweir st %o0,[%o1] 158cdf0e10cSrcweir 159cdf0e10cSrcweir .type osl_InterlockedCountSetV9,#function 160cdf0e10cSrcweir .size osl_InterlockedCountSetV9,.-osl_InterlockedCountSetV9 161cdf0e10cSrcweir 162cdf0e10cSrcweir 163cdf0e10cSrcweir .section ".text" 164cdf0e10cSrcweir .local osl_incrementInterlockedCountV8 165cdf0e10cSrcweir .align 4 166cdf0e10cSrcweir 167cdf0e10cSrcweir ! Implements osl_[increment|decrement]InterlockedCount with sparcv8 "swap" instruction. 168cdf0e10cSrcweir ! Uses -4096 as lock value for spinlock to allow for small negative counts. 169cdf0e10cSrcweir 170cdf0e10cSrcweir osl_incrementInterlockedCountV8: 171cdf0e10cSrcweir 172cdf0e10cSrcweir 1: ld [%o0], %o1 173cdf0e10cSrcweir cmp %o1, -4096 ! test spinlock 174cdf0e10cSrcweir be 1b 175cdf0e10cSrcweir mov -4096, %o1 ! delay slot 176cdf0e10cSrcweir swap [%o0], %o1 177cdf0e10cSrcweir cmp %o1, -4096 178cdf0e10cSrcweir be 1b 179cdf0e10cSrcweir inc %o1 ! delay slot, if we got spinlock, increment count 180cdf0e10cSrcweir st %o1, [%o0] 181cdf0e10cSrcweir retl 182cdf0e10cSrcweir mov %o1, %o0 ! delay slot 183cdf0e10cSrcweir 184cdf0e10cSrcweir .type osl_incrementInterlockedCountV8,#function 185cdf0e10cSrcweir .size osl_incrementInterlockedCountV8,.-osl_incrementInterlockedCountV8 186cdf0e10cSrcweir 187cdf0e10cSrcweir 188cdf0e10cSrcweir .section ".text" 189cdf0e10cSrcweir .local osl_decrementInterlockedCountV8 190cdf0e10cSrcweir .align 4 191cdf0e10cSrcweir 192cdf0e10cSrcweir osl_decrementInterlockedCountV8: 193cdf0e10cSrcweir 194cdf0e10cSrcweir 1: ld [%o0], %o1 195cdf0e10cSrcweir cmp %o1, -4096 ! test spinlock 196cdf0e10cSrcweir be 1b 197cdf0e10cSrcweir mov -4096, %o1 ! delay slot 198cdf0e10cSrcweir swap [%o0], %o1 199cdf0e10cSrcweir cmp %o1, -4096 200cdf0e10cSrcweir be 1b 201cdf0e10cSrcweir dec %o1 ! delay slot, if we got spinlock, decrement count 202cdf0e10cSrcweir st %o1, [%o0] ! delay slot 203cdf0e10cSrcweir retl 204cdf0e10cSrcweir mov %o1, %o0 ! delay slot 205cdf0e10cSrcweir 206cdf0e10cSrcweir .type osl_decrementInterlockedCountV8,#function 207cdf0e10cSrcweir .size osl_decrementInterlockedCountV8,.-osl_decrementInterlockedCountV8 208cdf0e10cSrcweir 209cdf0e10cSrcweir #endif /* !__sparcv8plus && !__sparcv9 && !_sparcv9__ */ 210cdf0e10cSrcweir 211cdf0e10cSrcweir .section ".text" 212cdf0e10cSrcweir #if defined(__sparcv8plus) || defined(__sparcv9) || defined(__sparc_v9__) 213cdf0e10cSrcweir #define osl_incrementInterlockedCountV9 osl_incrementInterlockedCount 214cdf0e10cSrcweir .global osl_incrementInterlockedCountV9 215cdf0e10cSrcweir #else 216cdf0e10cSrcweir .local osl_incrementInterlockedCountV9 217cdf0e10cSrcweir #endif 218cdf0e10cSrcweir .align 8 219cdf0e10cSrcweir 220cdf0e10cSrcweir ! Implements osl_[increment|decrement]InterlockedCount with sparcv9(sparcv8plus) "cas" 221cdf0e10cSrcweir ! instruction. 222cdf0e10cSrcweir 223cdf0e10cSrcweir osl_incrementInterlockedCountV9: 224cdf0e10cSrcweir 225cdf0e10cSrcweir 1: ld [%o0], %o1 226cdf0e10cSrcweir add %o1, 1, %o2 227cdf0e10cSrcweir ! allow linux to build for v8 228cdf0e10cSrcweir .word 0xD5E21009 229cdf0e10cSrcweir ! cas [%o0], %o1, %o2 230cdf0e10cSrcweir cmp %o1, %o2 231cdf0e10cSrcweir bne 1b 232cdf0e10cSrcweir nop ! delay slot 233cdf0e10cSrcweir retl 234cdf0e10cSrcweir add %o2, 1, %o0 ! delay slot 235cdf0e10cSrcweir 236cdf0e10cSrcweir .type osl_incrementInterlockedCountV9,#function 237cdf0e10cSrcweir .size osl_incrementInterlockedCountV9,.-osl_incrementInterlockedCountV9 238cdf0e10cSrcweir 239cdf0e10cSrcweir 240cdf0e10cSrcweir .section ".text" 241cdf0e10cSrcweir #if defined(__sparcv8plus) || defined(__sparcv9) || defined(__sparc_v9__) 242cdf0e10cSrcweir #define osl_decrementInterlockedCountV9 osl_decrementInterlockedCount 243cdf0e10cSrcweir .global osl_decrementInterlockedCountV9 244cdf0e10cSrcweir #else 245cdf0e10cSrcweir .local osl_decrementInterlockedCountV9 246cdf0e10cSrcweir #endif 247cdf0e10cSrcweir .align 8 248cdf0e10cSrcweir 249cdf0e10cSrcweir osl_decrementInterlockedCountV9: 250cdf0e10cSrcweir 251cdf0e10cSrcweir 1: ld [%o0], %o1 252cdf0e10cSrcweir sub %o1, 1, %o2 253cdf0e10cSrcweir ! allow linux to build for v8 254cdf0e10cSrcweir .word 0xD5E21009 255cdf0e10cSrcweir ! cas [%o0], %o1, %o2 256cdf0e10cSrcweir cmp %o1, %o2 257cdf0e10cSrcweir bne 1b 258cdf0e10cSrcweir nop ! delay slot 259cdf0e10cSrcweir retl 260cdf0e10cSrcweir sub %o2, 1, %o0 ! delay slot 261cdf0e10cSrcweir 262cdf0e10cSrcweir .type osl_decrementInterlockedCountV9,#function 263cdf0e10cSrcweir .size osl_decrementInterlockedCountV9,.-osl_decrementInterlockedCountV9 264