xpcom/reflect/xptcall/src/md/unix/xptcinvoke_arm.cpp

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
michael@0 2 /* This Source Code Form is subject to the terms of the Mozilla Public
michael@0 3 * License, v. 2.0. If a copy of the MPL was not distributed with this
michael@0 4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
michael@0 5
michael@0 6 /* Platform specific code to invoke XPCOM methods on native objects */
michael@0 7
michael@0 8 #include "xptcprivate.h"
michael@0 9
michael@0 10 #include "mozilla/Compiler.h"
michael@0 11
michael@0 12 #if !defined(__arm__) && !(defined(LINUX) || defined(ANDROID))
michael@0 13 #error "This code is for Linux ARM only. Check that it works on your system, too.\nBeware that this code is highly compiler dependent."
michael@0 14 #endif
michael@0 15
michael@0 16 #if MOZ_IS_GCC
michael@0 17 #if MOZ_GCC_VERSION_AT_LEAST(4, 5, 0) \
michael@0 18 && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS)
michael@0 19 #error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP."
michael@0 20 #endif
michael@0 21 #endif
michael@0 22
michael@0 23 #ifndef __ARM_PCS_VFP
michael@0 24
michael@0 25 /* This function copies a 64-bits word from dw to the given pointer in
michael@0 26 * a buffer delimited by start and end, possibly wrapping around the
michael@0 27 * buffer boundaries, and/or properly aligning the data at 64-bits word
michael@0 28 * boundaries (for EABI).
michael@0 29 * start and end are both assumed to be 64-bits aligned.
michael@0 30 * Returns a pointer to the second 32-bits word copied (to accomodate
michael@0 31 * the invoke_copy_to_stack loop).
michael@0 32 */
michael@0 33 static uint32_t *
michael@0 34 copy_double_word(uint32_t *start, uint32_t *current, uint32_t *end, uint64_t *dw)
michael@0 35 {
michael@0 36 #ifdef __ARM_EABI__
michael@0 37 /* Aligning the pointer for EABI */
michael@0 38 current = (uint32_t *)(((uint32_t)current + 7) & ~7);
michael@0 39 /* Wrap when reaching the end of the buffer */
michael@0 40 if (current == end) current = start;
michael@0 41 #else
michael@0 42 /* On non-EABI, 64-bits values are not aligned and when we reach the end
michael@0 43 * of the buffer, we need to write half of the data at the end, and the
michael@0 44 * other half at the beginning. */
michael@0 45 if (current == end - 1) {
michael@0 46 *current = ((uint32_t*)dw)[0];
michael@0 47 *start = ((uint32_t*)dw)[1];
michael@0 48 return start;
michael@0 49 }
michael@0 50 #endif
michael@0 51
michael@0 52 *((uint64_t*) current) = *dw;
michael@0 53 return current + 1;
michael@0 54 }
michael@0 55
michael@0 56 /* See stack_space comment in NS_InvokeByIndex to see why this needs not to
michael@0 57 * be static on DEBUG builds. */
michael@0 58 #ifndef DEBUG
michael@0 59 static
michael@0 60 #endif
michael@0 61 void
michael@0 62 invoke_copy_to_stack(uint32_t* stk, uint32_t *end,
michael@0 63 uint32_t paramCount, nsXPTCVariant* s)
michael@0 64 {
michael@0 65 /* The stack buffer is 64-bits aligned. The end argument points to its end.
michael@0 66 * The caller is assumed to create a stack buffer of at least four 32-bits
michael@0 67 * words.
michael@0 68 * We use the last three 32-bit words to store the values for r1, r2 and r3
michael@0 69 * for the method call, i.e. the first words for arguments passing.
michael@0 70 */
michael@0 71 uint32_t *d = end - 3;
michael@0 72 for(uint32_t i = 0; i < paramCount; i++, d++, s++)
michael@0 73 {
michael@0 74 /* Wrap when reaching the end of the stack buffer */
michael@0 75 if (d == end) d = stk;
michael@0 76 NS_ASSERTION(d >= stk && d < end,
michael@0 77 "invoke_copy_to_stack is copying outside its given buffer");
michael@0 78 if(s->IsPtrData())
michael@0 79 {
michael@0 80 *((void**)d) = s->ptr;
michael@0 81 continue;
michael@0 82 }
michael@0 83 // According to the ARM EABI, integral types that are smaller than a word
michael@0 84 // are to be sign/zero-extended to a full word and treated as 4-byte values.
michael@0 85
michael@0 86 switch(s->type)
michael@0 87 {
michael@0 88 case nsXPTType::T_I8 : *((int32_t*) d) = s->val.i8; break;
michael@0 89 case nsXPTType::T_I16 : *((int32_t*) d) = s->val.i16; break;
michael@0 90 case nsXPTType::T_I32 : *((int32_t*) d) = s->val.i32; break;
michael@0 91 case nsXPTType::T_I64 :
michael@0 92 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.i64);
michael@0 93 break;
michael@0 94 case nsXPTType::T_U8 : *((uint32_t*)d) = s->val.u8; break;
michael@0 95 case nsXPTType::T_U16 : *((uint32_t*)d) = s->val.u16; break;
michael@0 96 case nsXPTType::T_U32 : *((uint32_t*)d) = s->val.u32; break;
michael@0 97 case nsXPTType::T_U64 :
michael@0 98 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.u64);
michael@0 99 break;
michael@0 100 case nsXPTType::T_FLOAT : *((float*) d) = s->val.f; break;
michael@0 101 case nsXPTType::T_DOUBLE :
michael@0 102 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.d);
michael@0 103 break;
michael@0 104 case nsXPTType::T_BOOL : *((int32_t*) d) = s->val.b; break;
michael@0 105 case nsXPTType::T_CHAR : *((int32_t*) d) = s->val.c; break;
michael@0 106 case nsXPTType::T_WCHAR : *((int32_t*) d) = s->val.wc; break;
michael@0 107 default:
michael@0 108 // all the others are plain pointer types
michael@0 109 *((void**)d) = s->val.p;
michael@0 110 break;
michael@0 111 }
michael@0 112 }
michael@0 113 }
michael@0 114
michael@0 115 typedef nsresult (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t);
michael@0 116
michael@0 117 EXPORT_XPCOM_API(nsresult)
michael@0 118 NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
michael@0 119 uint32_t paramCount, nsXPTCVariant* params)
michael@0 120 {
michael@0 121
michael@0 122 /* This is to call a given method of class that.
michael@0 123 * The parameters are in params, the number is in paramCount.
michael@0 124 * The routine will issue calls to count the number of words
michael@0 125 * required for argument passing and to copy the arguments to
michael@0 126 * the stack.
michael@0 127 * ACPS passes the first 3 params in r1-r3 (with exceptions for 64-bits
michael@0 128 * arguments), and the remaining goes onto the stack.
michael@0 129 * We allocate a buffer on the stack for a "worst case" estimate of how much
michael@0 130 * stack might be needed for EABI, i.e. twice the number of parameters.
michael@0 131 * The end of this buffer will be used to store r1 to r3, so that the start
michael@0 132 * of the stack is the remaining parameters.
michael@0 133 * The magic here is to call the method with "that" and three 32-bits
michael@0 134 * arguments corresponding to r1-r3, so that the compiler generates the
michael@0 135 * proper function call. The stack will also contain the remaining arguments.
michael@0 136 *
michael@0 137 * !!! IMPORTANT !!!
michael@0 138 * This routine makes assumptions about the vtable layout of the c++ compiler. It's implemented
michael@0 139 * for arm-linux GNU g++ >= 2.8.1 (including egcs and gcc-2.95.[1-3])!
michael@0 140 *
michael@0 141 */
michael@0 142
michael@0 143 vtable_func *vtable, func;
michael@0 144 int base_size = (paramCount > 1) ? paramCount : 2;
michael@0 145
michael@0 146 /* !!! IMPORTANT !!!
michael@0 147 * On DEBUG builds, the NS_ASSERTION used in invoke_copy_to_stack needs to use
michael@0 148 * the stack to pass the 5th argument to NS_DebugBreak. When invoke_copy_to_stack
michael@0 149 * is inlined, this can result, depending on the compiler and flags, in the
michael@0 150 * stack pointer not pointing at stack_space when the method is called at the
michael@0 151 * end of this function. More generally, any function call requiring stack
michael@0 152 * allocation of arguments is unsafe to be inlined in this function.
michael@0 153 */
michael@0 154 uint32_t *stack_space = (uint32_t *) __builtin_alloca(base_size * 8);
michael@0 155
michael@0 156 invoke_copy_to_stack(stack_space, &stack_space[base_size * 2],
michael@0 157 paramCount, params);
michael@0 158
michael@0 159 vtable = *reinterpret_cast<vtable_func **>(that);
michael@0 160 func = vtable[methodIndex];
michael@0 161
michael@0 162 return func(that, stack_space[base_size * 2 - 3],
michael@0 163 stack_space[base_size * 2 - 2],
michael@0 164 stack_space[base_size * 2 - 1]);
michael@0 165 }
michael@0 166
michael@0 167 #else /* __ARM_PCS_VFP */
michael@0 168
michael@0 169 /* "Procedure Call Standard for the ARM Architecture" document, sections
michael@0 170 * "5.5 Parameter Passing" and "6.1.2 Procedure Calling" contain all the
michael@0 171 * needed information.
michael@0 172 *
michael@0 173 * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042d/IHI0042D_aapcs.pdf
michael@0 174 */
michael@0 175
michael@0 176 #if defined(__thumb__) && !defined(__thumb2__)
michael@0 177 #error "Thumb1 is not supported"
michael@0 178 #endif
michael@0 179
michael@0 180 #ifndef __ARMEL__
michael@0 181 #error "Only little endian compatibility was tested"
michael@0 182 #endif
michael@0 183
michael@0 184 /*
michael@0 185 * Allocation of integer function arguments initially to registers r1-r3
michael@0 186 * and then to stack. Handling of 'this' argument which goes to r0 registers
michael@0 187 * is handled separately and does not belong to these two inline functions.
michael@0 188 *
michael@0 189 * The doubleword arguments are allocated to even:odd
michael@0 190 * register pairs or get aligned at 8-byte boundary on stack. The "holes"
michael@0 191 * which may appear as a result of this realignment remain unused.
michael@0 192 *
michael@0 193 * 'ireg_args' - pointer to the current position in the buffer,
michael@0 194 * corresponding to the register arguments
michael@0 195 * 'stack_args' - pointer to the current position in the buffer,
michael@0 196 * corresponding to the arguments on stack
michael@0 197 * 'end' - pointer to the end of the registers argument
michael@0 198 * buffer (it is guaranteed to be 8-bytes aligned)
michael@0 199 */
michael@0 200
michael@0 201 static inline void copy_word(uint32_t* &ireg_args,
michael@0 202 uint32_t* &stack_args,
michael@0 203 uint32_t* end,
michael@0 204 uint32_t data)
michael@0 205 {
michael@0 206 if (ireg_args < end) {
michael@0 207 *ireg_args = data;
michael@0 208 ireg_args++;
michael@0 209 } else {
michael@0 210 *stack_args = data;
michael@0 211 stack_args++;
michael@0 212 }
michael@0 213 }
michael@0 214
michael@0 215 static inline void copy_dword(uint32_t* &ireg_args,
michael@0 216 uint32_t* &stack_args,
michael@0 217 uint32_t* end,
michael@0 218 uint64_t data)
michael@0 219 {
michael@0 220 if (ireg_args + 1 < end) {
michael@0 221 if ((uint32_t)ireg_args & 4) {
michael@0 222 ireg_args++;
michael@0 223 }
michael@0 224 *(uint64_t *)ireg_args = data;
michael@0 225 ireg_args += 2;
michael@0 226 } else {
michael@0 227 if ((uint32_t)stack_args & 4) {
michael@0 228 stack_args++;
michael@0 229 }
michael@0 230 *(uint64_t *)stack_args = data;
michael@0 231 stack_args += 2;
michael@0 232 }
michael@0 233 }
michael@0 234
michael@0 235 /*
michael@0 236 * Allocation of floating point arguments to VFP registers (s0-s15, d0-d7).
michael@0 237 *
michael@0 238 * Unlike integer registers allocation, "back-filling" needs to be
michael@0 239 * supported. For example, the third floating point argument in the
michael@0 240 * following function is going to be allocated to s1 register, back-filling
michael@0 241 * the "hole":
michael@0 242 * void f(float s0, double d1, float s1)
michael@0 243 *
michael@0 244 * Refer to the "Procedure Call Standard for the ARM Architecture" document
michael@0 245 * for more details.
michael@0 246 *
michael@0 247 * 'vfp_s_args' - pointer to the current position in the buffer with
michael@0 248 * the next unallocated single precision register
michael@0 249 * 'vfp_d_args' - pointer to the current position in the buffer with
michael@0 250 * the next unallocated double precision register,
michael@0 251 * it has the same value as 'vfp_s_args' when back-filling
michael@0 252 * is not used
michael@0 253 * 'end' - pointer to the end of the vfp registers argument
michael@0 254 * buffer (it is guaranteed to be 8-bytes aligned)
michael@0 255 *
michael@0 256 * Mozilla bugtracker has a test program attached which be used for
michael@0 257 * experimenting with VFP registers allocation code and testing its
michael@0 258 * correctness:
michael@0 259 * https://bugzilla.mozilla.org/show_bug.cgi?id=601914#c19
michael@0 260 */
michael@0 261
michael@0 262 static inline bool copy_vfp_single(float* &vfp_s_args, double* &vfp_d_args,
michael@0 263 float* end, float data)
michael@0 264 {
michael@0 265 if (vfp_s_args >= end)
michael@0 266 return false;
michael@0 267
michael@0 268 *vfp_s_args = data;
michael@0 269 vfp_s_args++;
michael@0 270 if (vfp_s_args < (float *)vfp_d_args) {
michael@0 271 // It was the case of back-filling, now the next free single precision
michael@0 272 // register should overlap with the next free double precision register
michael@0 273 vfp_s_args = (float *)vfp_d_args;
michael@0 274 } else if (vfp_s_args > (float *)vfp_d_args) {
michael@0 275 // also update the pointer to the next free double precision register
michael@0 276 vfp_d_args++;
michael@0 277 }
michael@0 278 return true;
michael@0 279 }
michael@0 280
michael@0 281 static inline bool copy_vfp_double(float* &vfp_s_args, double* &vfp_d_args,
michael@0 282 float* end, double data)
michael@0 283 {
michael@0 284 if (vfp_d_args >= (double *)end) {
michael@0 285 // The back-filling continues only so long as no VFP CPRC has been
michael@0 286 // allocated to a slot on the stack. Basically no VFP registers can
michael@0 287 // be allocated after this point.
michael@0 288 vfp_s_args = end;
michael@0 289 return false;
michael@0 290 }
michael@0 291
michael@0 292 if (vfp_s_args == (float *)vfp_d_args) {
michael@0 293 // also update the pointer to the next free single precision register
michael@0 294 vfp_s_args += 2;
michael@0 295 }
michael@0 296 *vfp_d_args = data;
michael@0 297 vfp_d_args++;
michael@0 298 return true;
michael@0 299 }
michael@0 300
michael@0 301 static void
michael@0 302 invoke_copy_to_stack(uint32_t* stk, uint32_t *end,
michael@0 303 uint32_t paramCount, nsXPTCVariant* s)
michael@0 304 {
michael@0 305 uint32_t *ireg_args = end - 3;
michael@0 306 float *vfp_s_args = (float *)end;
michael@0 307 double *vfp_d_args = (double *)end;
michael@0 308 float *vfp_end = vfp_s_args + 16;
michael@0 309
michael@0 310 for (uint32_t i = 0; i < paramCount; i++, s++) {
michael@0 311 if (s->IsPtrData()) {
michael@0 312 copy_word(ireg_args, stk, end, (uint32_t)s->ptr);
michael@0 313 continue;
michael@0 314 }
michael@0 315 // According to the ARM EABI, integral types that are smaller than a word
michael@0 316 // are to be sign/zero-extended to a full word and treated as 4-byte values
michael@0 317 switch (s->type)
michael@0 318 {
michael@0 319 case nsXPTType::T_FLOAT:
michael@0 320 if (!copy_vfp_single(vfp_s_args, vfp_d_args, vfp_end, s->val.f)) {
michael@0 321 copy_word(end, stk, end, reinterpret_cast<uint32_t&>(s->val.f));
michael@0 322 }
michael@0 323 break;
michael@0 324 case nsXPTType::T_DOUBLE:
michael@0 325 if (!copy_vfp_double(vfp_s_args, vfp_d_args, vfp_end, s->val.d)) {
michael@0 326 copy_dword(end, stk, end, reinterpret_cast<uint64_t&>(s->val.d));
michael@0 327 }
michael@0 328 break;
michael@0 329 case nsXPTType::T_I8: copy_word(ireg_args, stk, end, s->val.i8); break;
michael@0 330 case nsXPTType::T_I16: copy_word(ireg_args, stk, end, s->val.i16); break;
michael@0 331 case nsXPTType::T_I32: copy_word(ireg_args, stk, end, s->val.i32); break;
michael@0 332 case nsXPTType::T_I64: copy_dword(ireg_args, stk, end, s->val.i64); break;
michael@0 333 case nsXPTType::T_U8: copy_word(ireg_args, stk, end, s->val.u8); break;
michael@0 334 case nsXPTType::T_U16: copy_word(ireg_args, stk, end, s->val.u16); break;
michael@0 335 case nsXPTType::T_U32: copy_word(ireg_args, stk, end, s->val.u32); break;
michael@0 336 case nsXPTType::T_U64: copy_dword(ireg_args, stk, end, s->val.u64); break;
michael@0 337 case nsXPTType::T_BOOL: copy_word(ireg_args, stk, end, s->val.b); break;
michael@0 338 case nsXPTType::T_CHAR: copy_word(ireg_args, stk, end, s->val.c); break;
michael@0 339 case nsXPTType::T_WCHAR: copy_word(ireg_args, stk, end, s->val.wc); break;
michael@0 340 default:
michael@0 341 // all the others are plain pointer types
michael@0 342 copy_word(ireg_args, stk, end, reinterpret_cast<uint32_t>(s->val.p));
michael@0 343 break;
michael@0 344 }
michael@0 345 }
michael@0 346 }
michael@0 347
michael@0 348 typedef uint32_t (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t);
michael@0 349
michael@0 350 EXPORT_XPCOM_API(nsresult)
michael@0 351 NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
michael@0 352 uint32_t paramCount, nsXPTCVariant* params)
michael@0 353 {
michael@0 354 vtable_func *vtable = *reinterpret_cast<vtable_func **>(that);
michael@0 355 vtable_func func = vtable[methodIndex];
michael@0 356 // 'register uint32_t result asm("r0")' could be used here, but it does not
michael@0 357 // seem to be reliable in all cases: http://gcc.gnu.org/PR46164
michael@0 358 nsresult result;
michael@0 359 asm (
michael@0 360 "mov r3, sp\n"
michael@0 361 "mov %[stack_space_size], %[param_count_plus_2], lsl #3\n"
michael@0 362 "tst r3, #4\n" /* check stack alignment */
michael@0 363
michael@0 364 "add %[stack_space_size], #(4 * 16)\n" /* space for VFP registers */
michael@0 365 "mov r3, %[params]\n"
michael@0 366
michael@0 367 "it ne\n"
michael@0 368 "addne %[stack_space_size], %[stack_space_size], #4\n"
michael@0 369 "sub r0, sp, %[stack_space_size]\n" /* allocate space on stack */
michael@0 370
michael@0 371 "sub r2, %[param_count_plus_2], #2\n"
michael@0 372 "mov sp, r0\n"
michael@0 373
michael@0 374 "add r1, r0, %[param_count_plus_2], lsl #3\n"
michael@0 375 "blx %[invoke_copy_to_stack]\n"
michael@0 376
michael@0 377 "add ip, sp, %[param_count_plus_2], lsl #3\n"
michael@0 378 "mov r0, %[that]\n"
michael@0 379 "ldmdb ip, {r1, r2, r3}\n"
michael@0 380 "vldm ip, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
michael@0 381 "blx %[func]\n"
michael@0 382
michael@0 383 "add sp, sp, %[stack_space_size]\n" /* cleanup stack */
michael@0 384 "mov %[stack_space_size], r0\n" /* it's actually 'result' variable */
michael@0 385 : [stack_space_size] "=&r" (result)
michael@0 386 : [func] "r" (func),
michael@0 387 [that] "r" (that),
michael@0 388 [params] "r" (params),
michael@0 389 [param_count_plus_2] "r" (paramCount + 2),
michael@0 390 [invoke_copy_to_stack] "r" (invoke_copy_to_stack)
michael@0 391 : "cc", "memory",
michael@0 392 // Mark all the scratch registers as clobbered because they may be
michael@0 393 // modified by the functions, called from this inline assembly block
michael@0 394 "r0", "r1", "r2", "r3", "ip", "lr",
michael@0 395 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
michael@0 396 // Also unconditionally mark d16-d31 registers as clobbered even though
michael@0 397 // they actually don't exist in vfpv2 and vfpv3-d16 variants. There is
michael@0 398 // no way to identify VFP variant using preprocessor at the momemnt
michael@0 399 // (see http://gcc.gnu.org/PR46128 for more details), but fortunately
michael@0 400 // current versions of gcc do not seem to complain about these registers
michael@0 401 // even when this code is compiled with '-mfpu=vfpv3-d16' option.
michael@0 402 // If gcc becomes more strict in the future and/or provides a way to
michael@0 403 // identify VFP variant, the following d16-d31 registers list needs
michael@0 404 // to be wrapped into some #ifdef
michael@0 405 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
michael@0 406 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
michael@0 407 );
michael@0 408 return result;
michael@0 409 }
michael@0 410
michael@0 411 #endif

mercurial