1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_arm.cpp Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,411 @@ 1.4 +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public 1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this 1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 1.8 + 1.9 +/* Platform specific code to invoke XPCOM methods on native objects */ 1.10 + 1.11 +#include "xptcprivate.h" 1.12 + 1.13 +#include "mozilla/Compiler.h" 1.14 + 1.15 +#if !defined(__arm__) && !(defined(LINUX) || defined(ANDROID)) 1.16 +#error "This code is for Linux ARM only. Check that it works on your system, too.\nBeware that this code is highly compiler dependent." 1.17 +#endif 1.18 + 1.19 +#if MOZ_IS_GCC 1.20 +#if MOZ_GCC_VERSION_AT_LEAST(4, 5, 0) \ 1.21 + && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS) 1.22 +#error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP." 1.23 +#endif 1.24 +#endif 1.25 + 1.26 +#ifndef __ARM_PCS_VFP 1.27 + 1.28 +/* This function copies a 64-bits word from dw to the given pointer in 1.29 + * a buffer delimited by start and end, possibly wrapping around the 1.30 + * buffer boundaries, and/or properly aligning the data at 64-bits word 1.31 + * boundaries (for EABI). 1.32 + * start and end are both assumed to be 64-bits aligned. 1.33 + * Returns a pointer to the second 32-bits word copied (to accomodate 1.34 + * the invoke_copy_to_stack loop). 1.35 + */ 1.36 +static uint32_t * 1.37 +copy_double_word(uint32_t *start, uint32_t *current, uint32_t *end, uint64_t *dw) 1.38 +{ 1.39 +#ifdef __ARM_EABI__ 1.40 + /* Aligning the pointer for EABI */ 1.41 + current = (uint32_t *)(((uint32_t)current + 7) & ~7); 1.42 + /* Wrap when reaching the end of the buffer */ 1.43 + if (current == end) current = start; 1.44 +#else 1.45 + /* On non-EABI, 64-bits values are not aligned and when we reach the end 1.46 + * of the buffer, we need to write half of the data at the end, and the 1.47 + * other half at the beginning. */ 1.48 + if (current == end - 1) { 1.49 + *current = ((uint32_t*)dw)[0]; 1.50 + *start = ((uint32_t*)dw)[1]; 1.51 + return start; 1.52 + } 1.53 +#endif 1.54 + 1.55 + *((uint64_t*) current) = *dw; 1.56 + return current + 1; 1.57 +} 1.58 + 1.59 +/* See stack_space comment in NS_InvokeByIndex to see why this needs not to 1.60 + * be static on DEBUG builds. */ 1.61 +#ifndef DEBUG 1.62 +static 1.63 +#endif 1.64 +void 1.65 +invoke_copy_to_stack(uint32_t* stk, uint32_t *end, 1.66 + uint32_t paramCount, nsXPTCVariant* s) 1.67 +{ 1.68 + /* The stack buffer is 64-bits aligned. The end argument points to its end. 1.69 + * The caller is assumed to create a stack buffer of at least four 32-bits 1.70 + * words. 1.71 + * We use the last three 32-bit words to store the values for r1, r2 and r3 1.72 + * for the method call, i.e. the first words for arguments passing. 1.73 + */ 1.74 + uint32_t *d = end - 3; 1.75 + for(uint32_t i = 0; i < paramCount; i++, d++, s++) 1.76 + { 1.77 + /* Wrap when reaching the end of the stack buffer */ 1.78 + if (d == end) d = stk; 1.79 + NS_ASSERTION(d >= stk && d < end, 1.80 + "invoke_copy_to_stack is copying outside its given buffer"); 1.81 + if(s->IsPtrData()) 1.82 + { 1.83 + *((void**)d) = s->ptr; 1.84 + continue; 1.85 + } 1.86 + // According to the ARM EABI, integral types that are smaller than a word 1.87 + // are to be sign/zero-extended to a full word and treated as 4-byte values. 1.88 + 1.89 + switch(s->type) 1.90 + { 1.91 + case nsXPTType::T_I8 : *((int32_t*) d) = s->val.i8; break; 1.92 + case nsXPTType::T_I16 : *((int32_t*) d) = s->val.i16; break; 1.93 + case nsXPTType::T_I32 : *((int32_t*) d) = s->val.i32; break; 1.94 + case nsXPTType::T_I64 : 1.95 + d = copy_double_word(stk, d, end, (uint64_t *)&s->val.i64); 1.96 + break; 1.97 + case nsXPTType::T_U8 : *((uint32_t*)d) = s->val.u8; break; 1.98 + case nsXPTType::T_U16 : *((uint32_t*)d) = s->val.u16; break; 1.99 + case nsXPTType::T_U32 : *((uint32_t*)d) = s->val.u32; break; 1.100 + case nsXPTType::T_U64 : 1.101 + d = copy_double_word(stk, d, end, (uint64_t *)&s->val.u64); 1.102 + break; 1.103 + case nsXPTType::T_FLOAT : *((float*) d) = s->val.f; break; 1.104 + case nsXPTType::T_DOUBLE : 1.105 + d = copy_double_word(stk, d, end, (uint64_t *)&s->val.d); 1.106 + break; 1.107 + case nsXPTType::T_BOOL : *((int32_t*) d) = s->val.b; break; 1.108 + case nsXPTType::T_CHAR : *((int32_t*) d) = s->val.c; break; 1.109 + case nsXPTType::T_WCHAR : *((int32_t*) d) = s->val.wc; break; 1.110 + default: 1.111 + // all the others are plain pointer types 1.112 + *((void**)d) = s->val.p; 1.113 + break; 1.114 + } 1.115 + } 1.116 +} 1.117 + 1.118 +typedef nsresult (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); 1.119 + 1.120 +EXPORT_XPCOM_API(nsresult) 1.121 +NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, 1.122 + uint32_t paramCount, nsXPTCVariant* params) 1.123 +{ 1.124 + 1.125 +/* This is to call a given method of class that. 1.126 + * The parameters are in params, the number is in paramCount. 1.127 + * The routine will issue calls to count the number of words 1.128 + * required for argument passing and to copy the arguments to 1.129 + * the stack. 1.130 + * ACPS passes the first 3 params in r1-r3 (with exceptions for 64-bits 1.131 + * arguments), and the remaining goes onto the stack. 1.132 + * We allocate a buffer on the stack for a "worst case" estimate of how much 1.133 + * stack might be needed for EABI, i.e. twice the number of parameters. 1.134 + * The end of this buffer will be used to store r1 to r3, so that the start 1.135 + * of the stack is the remaining parameters. 1.136 + * The magic here is to call the method with "that" and three 32-bits 1.137 + * arguments corresponding to r1-r3, so that the compiler generates the 1.138 + * proper function call. The stack will also contain the remaining arguments. 1.139 + * 1.140 + * !!! IMPORTANT !!! 1.141 + * This routine makes assumptions about the vtable layout of the c++ compiler. It's implemented 1.142 + * for arm-linux GNU g++ >= 2.8.1 (including egcs and gcc-2.95.[1-3])! 1.143 + * 1.144 + */ 1.145 + 1.146 + vtable_func *vtable, func; 1.147 + int base_size = (paramCount > 1) ? paramCount : 2; 1.148 + 1.149 +/* !!! IMPORTANT !!! 1.150 + * On DEBUG builds, the NS_ASSERTION used in invoke_copy_to_stack needs to use 1.151 + * the stack to pass the 5th argument to NS_DebugBreak. When invoke_copy_to_stack 1.152 + * is inlined, this can result, depending on the compiler and flags, in the 1.153 + * stack pointer not pointing at stack_space when the method is called at the 1.154 + * end of this function. More generally, any function call requiring stack 1.155 + * allocation of arguments is unsafe to be inlined in this function. 1.156 + */ 1.157 + uint32_t *stack_space = (uint32_t *) __builtin_alloca(base_size * 8); 1.158 + 1.159 + invoke_copy_to_stack(stack_space, &stack_space[base_size * 2], 1.160 + paramCount, params); 1.161 + 1.162 + vtable = *reinterpret_cast<vtable_func **>(that); 1.163 + func = vtable[methodIndex]; 1.164 + 1.165 + return func(that, stack_space[base_size * 2 - 3], 1.166 + stack_space[base_size * 2 - 2], 1.167 + stack_space[base_size * 2 - 1]); 1.168 +} 1.169 + 1.170 +#else /* __ARM_PCS_VFP */ 1.171 + 1.172 +/* "Procedure Call Standard for the ARM Architecture" document, sections 1.173 + * "5.5 Parameter Passing" and "6.1.2 Procedure Calling" contain all the 1.174 + * needed information. 1.175 + * 1.176 + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042d/IHI0042D_aapcs.pdf 1.177 + */ 1.178 + 1.179 +#if defined(__thumb__) && !defined(__thumb2__) 1.180 +#error "Thumb1 is not supported" 1.181 +#endif 1.182 + 1.183 +#ifndef __ARMEL__ 1.184 +#error "Only little endian compatibility was tested" 1.185 +#endif 1.186 + 1.187 +/* 1.188 + * Allocation of integer function arguments initially to registers r1-r3 1.189 + * and then to stack. Handling of 'this' argument which goes to r0 registers 1.190 + * is handled separately and does not belong to these two inline functions. 1.191 + * 1.192 + * The doubleword arguments are allocated to even:odd 1.193 + * register pairs or get aligned at 8-byte boundary on stack. The "holes" 1.194 + * which may appear as a result of this realignment remain unused. 1.195 + * 1.196 + * 'ireg_args' - pointer to the current position in the buffer, 1.197 + * corresponding to the register arguments 1.198 + * 'stack_args' - pointer to the current position in the buffer, 1.199 + * corresponding to the arguments on stack 1.200 + * 'end' - pointer to the end of the registers argument 1.201 + * buffer (it is guaranteed to be 8-bytes aligned) 1.202 + */ 1.203 + 1.204 +static inline void copy_word(uint32_t* &ireg_args, 1.205 + uint32_t* &stack_args, 1.206 + uint32_t* end, 1.207 + uint32_t data) 1.208 +{ 1.209 + if (ireg_args < end) { 1.210 + *ireg_args = data; 1.211 + ireg_args++; 1.212 + } else { 1.213 + *stack_args = data; 1.214 + stack_args++; 1.215 + } 1.216 +} 1.217 + 1.218 +static inline void copy_dword(uint32_t* &ireg_args, 1.219 + uint32_t* &stack_args, 1.220 + uint32_t* end, 1.221 + uint64_t data) 1.222 +{ 1.223 + if (ireg_args + 1 < end) { 1.224 + if ((uint32_t)ireg_args & 4) { 1.225 + ireg_args++; 1.226 + } 1.227 + *(uint64_t *)ireg_args = data; 1.228 + ireg_args += 2; 1.229 + } else { 1.230 + if ((uint32_t)stack_args & 4) { 1.231 + stack_args++; 1.232 + } 1.233 + *(uint64_t *)stack_args = data; 1.234 + stack_args += 2; 1.235 + } 1.236 +} 1.237 + 1.238 +/* 1.239 + * Allocation of floating point arguments to VFP registers (s0-s15, d0-d7). 1.240 + * 1.241 + * Unlike integer registers allocation, "back-filling" needs to be 1.242 + * supported. For example, the third floating point argument in the 1.243 + * following function is going to be allocated to s1 register, back-filling 1.244 + * the "hole": 1.245 + * void f(float s0, double d1, float s1) 1.246 + * 1.247 + * Refer to the "Procedure Call Standard for the ARM Architecture" document 1.248 + * for more details. 1.249 + * 1.250 + * 'vfp_s_args' - pointer to the current position in the buffer with 1.251 + * the next unallocated single precision register 1.252 + * 'vfp_d_args' - pointer to the current position in the buffer with 1.253 + * the next unallocated double precision register, 1.254 + * it has the same value as 'vfp_s_args' when back-filling 1.255 + * is not used 1.256 + * 'end' - pointer to the end of the vfp registers argument 1.257 + * buffer (it is guaranteed to be 8-bytes aligned) 1.258 + * 1.259 + * Mozilla bugtracker has a test program attached which be used for 1.260 + * experimenting with VFP registers allocation code and testing its 1.261 + * correctness: 1.262 + * https://bugzilla.mozilla.org/show_bug.cgi?id=601914#c19 1.263 + */ 1.264 + 1.265 +static inline bool copy_vfp_single(float* &vfp_s_args, double* &vfp_d_args, 1.266 + float* end, float data) 1.267 +{ 1.268 + if (vfp_s_args >= end) 1.269 + return false; 1.270 + 1.271 + *vfp_s_args = data; 1.272 + vfp_s_args++; 1.273 + if (vfp_s_args < (float *)vfp_d_args) { 1.274 + // It was the case of back-filling, now the next free single precision 1.275 + // register should overlap with the next free double precision register 1.276 + vfp_s_args = (float *)vfp_d_args; 1.277 + } else if (vfp_s_args > (float *)vfp_d_args) { 1.278 + // also update the pointer to the next free double precision register 1.279 + vfp_d_args++; 1.280 + } 1.281 + return true; 1.282 +} 1.283 + 1.284 +static inline bool copy_vfp_double(float* &vfp_s_args, double* &vfp_d_args, 1.285 + float* end, double data) 1.286 +{ 1.287 + if (vfp_d_args >= (double *)end) { 1.288 + // The back-filling continues only so long as no VFP CPRC has been 1.289 + // allocated to a slot on the stack. Basically no VFP registers can 1.290 + // be allocated after this point. 1.291 + vfp_s_args = end; 1.292 + return false; 1.293 + } 1.294 + 1.295 + if (vfp_s_args == (float *)vfp_d_args) { 1.296 + // also update the pointer to the next free single precision register 1.297 + vfp_s_args += 2; 1.298 + } 1.299 + *vfp_d_args = data; 1.300 + vfp_d_args++; 1.301 + return true; 1.302 +} 1.303 + 1.304 +static void 1.305 +invoke_copy_to_stack(uint32_t* stk, uint32_t *end, 1.306 + uint32_t paramCount, nsXPTCVariant* s) 1.307 +{ 1.308 + uint32_t *ireg_args = end - 3; 1.309 + float *vfp_s_args = (float *)end; 1.310 + double *vfp_d_args = (double *)end; 1.311 + float *vfp_end = vfp_s_args + 16; 1.312 + 1.313 + for (uint32_t i = 0; i < paramCount; i++, s++) { 1.314 + if (s->IsPtrData()) { 1.315 + copy_word(ireg_args, stk, end, (uint32_t)s->ptr); 1.316 + continue; 1.317 + } 1.318 + // According to the ARM EABI, integral types that are smaller than a word 1.319 + // are to be sign/zero-extended to a full word and treated as 4-byte values 1.320 + switch (s->type) 1.321 + { 1.322 + case nsXPTType::T_FLOAT: 1.323 + if (!copy_vfp_single(vfp_s_args, vfp_d_args, vfp_end, s->val.f)) { 1.324 + copy_word(end, stk, end, reinterpret_cast<uint32_t&>(s->val.f)); 1.325 + } 1.326 + break; 1.327 + case nsXPTType::T_DOUBLE: 1.328 + if (!copy_vfp_double(vfp_s_args, vfp_d_args, vfp_end, s->val.d)) { 1.329 + copy_dword(end, stk, end, reinterpret_cast<uint64_t&>(s->val.d)); 1.330 + } 1.331 + break; 1.332 + case nsXPTType::T_I8: copy_word(ireg_args, stk, end, s->val.i8); break; 1.333 + case nsXPTType::T_I16: copy_word(ireg_args, stk, end, s->val.i16); break; 1.334 + case nsXPTType::T_I32: copy_word(ireg_args, stk, end, s->val.i32); break; 1.335 + case nsXPTType::T_I64: copy_dword(ireg_args, stk, end, s->val.i64); break; 1.336 + case nsXPTType::T_U8: copy_word(ireg_args, stk, end, s->val.u8); break; 1.337 + case nsXPTType::T_U16: copy_word(ireg_args, stk, end, s->val.u16); break; 1.338 + case nsXPTType::T_U32: copy_word(ireg_args, stk, end, s->val.u32); break; 1.339 + case nsXPTType::T_U64: copy_dword(ireg_args, stk, end, s->val.u64); break; 1.340 + case nsXPTType::T_BOOL: copy_word(ireg_args, stk, end, s->val.b); break; 1.341 + case nsXPTType::T_CHAR: copy_word(ireg_args, stk, end, s->val.c); break; 1.342 + case nsXPTType::T_WCHAR: copy_word(ireg_args, stk, end, s->val.wc); break; 1.343 + default: 1.344 + // all the others are plain pointer types 1.345 + copy_word(ireg_args, stk, end, reinterpret_cast<uint32_t>(s->val.p)); 1.346 + break; 1.347 + } 1.348 + } 1.349 +} 1.350 + 1.351 +typedef uint32_t (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); 1.352 + 1.353 +EXPORT_XPCOM_API(nsresult) 1.354 +NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, 1.355 + uint32_t paramCount, nsXPTCVariant* params) 1.356 +{ 1.357 + vtable_func *vtable = *reinterpret_cast<vtable_func **>(that); 1.358 + vtable_func func = vtable[methodIndex]; 1.359 + // 'register uint32_t result asm("r0")' could be used here, but it does not 1.360 + // seem to be reliable in all cases: http://gcc.gnu.org/PR46164 1.361 + nsresult result; 1.362 + asm ( 1.363 + "mov r3, sp\n" 1.364 + "mov %[stack_space_size], %[param_count_plus_2], lsl #3\n" 1.365 + "tst r3, #4\n" /* check stack alignment */ 1.366 + 1.367 + "add %[stack_space_size], #(4 * 16)\n" /* space for VFP registers */ 1.368 + "mov r3, %[params]\n" 1.369 + 1.370 + "it ne\n" 1.371 + "addne %[stack_space_size], %[stack_space_size], #4\n" 1.372 + "sub r0, sp, %[stack_space_size]\n" /* allocate space on stack */ 1.373 + 1.374 + "sub r2, %[param_count_plus_2], #2\n" 1.375 + "mov sp, r0\n" 1.376 + 1.377 + "add r1, r0, %[param_count_plus_2], lsl #3\n" 1.378 + "blx %[invoke_copy_to_stack]\n" 1.379 + 1.380 + "add ip, sp, %[param_count_plus_2], lsl #3\n" 1.381 + "mov r0, %[that]\n" 1.382 + "ldmdb ip, {r1, r2, r3}\n" 1.383 + "vldm ip, {d0, d1, d2, d3, d4, d5, d6, d7}\n" 1.384 + "blx %[func]\n" 1.385 + 1.386 + "add sp, sp, %[stack_space_size]\n" /* cleanup stack */ 1.387 + "mov %[stack_space_size], r0\n" /* it's actually 'result' variable */ 1.388 + : [stack_space_size] "=&r" (result) 1.389 + : [func] "r" (func), 1.390 + [that] "r" (that), 1.391 + [params] "r" (params), 1.392 + [param_count_plus_2] "r" (paramCount + 2), 1.393 + [invoke_copy_to_stack] "r" (invoke_copy_to_stack) 1.394 + : "cc", "memory", 1.395 + // Mark all the scratch registers as clobbered because they may be 1.396 + // modified by the functions, called from this inline assembly block 1.397 + "r0", "r1", "r2", "r3", "ip", "lr", 1.398 + "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", 1.399 + // Also unconditionally mark d16-d31 registers as clobbered even though 1.400 + // they actually don't exist in vfpv2 and vfpv3-d16 variants. There is 1.401 + // no way to identify VFP variant using preprocessor at the momemnt 1.402 + // (see http://gcc.gnu.org/PR46128 for more details), but fortunately 1.403 + // current versions of gcc do not seem to complain about these registers 1.404 + // even when this code is compiled with '-mfpu=vfpv3-d16' option. 1.405 + // If gcc becomes more strict in the future and/or provides a way to 1.406 + // identify VFP variant, the following d16-d31 registers list needs 1.407 + // to be wrapped into some #ifdef 1.408 + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", 1.409 + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" 1.410 + ); 1.411 + return result; 1.412 +} 1.413 + 1.414 +#endif