michael@0: /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ michael@0: /* This Source Code Form is subject to the terms of the Mozilla Public michael@0: * License, v. 2.0. If a copy of the MPL was not distributed with this michael@0: * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ michael@0: michael@0: /* Platform specific code to invoke XPCOM methods on native objects */ michael@0: michael@0: #include "xptcprivate.h" michael@0: michael@0: #include "mozilla/Compiler.h" michael@0: michael@0: #if !defined(__arm__) && !(defined(LINUX) || defined(ANDROID)) michael@0: #error "This code is for Linux ARM only. Check that it works on your system, too.\nBeware that this code is highly compiler dependent." michael@0: #endif michael@0: michael@0: #if MOZ_IS_GCC michael@0: #if MOZ_GCC_VERSION_AT_LEAST(4, 5, 0) \ michael@0: && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS) michael@0: #error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP." michael@0: #endif michael@0: #endif michael@0: michael@0: #ifndef __ARM_PCS_VFP michael@0: michael@0: /* This function copies a 64-bits word from dw to the given pointer in michael@0: * a buffer delimited by start and end, possibly wrapping around the michael@0: * buffer boundaries, and/or properly aligning the data at 64-bits word michael@0: * boundaries (for EABI). michael@0: * start and end are both assumed to be 64-bits aligned. michael@0: * Returns a pointer to the second 32-bits word copied (to accomodate michael@0: * the invoke_copy_to_stack loop). michael@0: */ michael@0: static uint32_t * michael@0: copy_double_word(uint32_t *start, uint32_t *current, uint32_t *end, uint64_t *dw) michael@0: { michael@0: #ifdef __ARM_EABI__ michael@0: /* Aligning the pointer for EABI */ michael@0: current = (uint32_t *)(((uint32_t)current + 7) & ~7); michael@0: /* Wrap when reaching the end of the buffer */ michael@0: if (current == end) current = start; michael@0: #else michael@0: /* On non-EABI, 64-bits values are not aligned and when we reach the end michael@0: * of the buffer, we need to write half of the data at the end, and the michael@0: * other half at the beginning. */ michael@0: if (current == end - 1) { michael@0: *current = ((uint32_t*)dw)[0]; michael@0: *start = ((uint32_t*)dw)[1]; michael@0: return start; michael@0: } michael@0: #endif michael@0: michael@0: *((uint64_t*) current) = *dw; michael@0: return current + 1; michael@0: } michael@0: michael@0: /* See stack_space comment in NS_InvokeByIndex to see why this needs not to michael@0: * be static on DEBUG builds. */ michael@0: #ifndef DEBUG michael@0: static michael@0: #endif michael@0: void michael@0: invoke_copy_to_stack(uint32_t* stk, uint32_t *end, michael@0: uint32_t paramCount, nsXPTCVariant* s) michael@0: { michael@0: /* The stack buffer is 64-bits aligned. The end argument points to its end. michael@0: * The caller is assumed to create a stack buffer of at least four 32-bits michael@0: * words. michael@0: * We use the last three 32-bit words to store the values for r1, r2 and r3 michael@0: * for the method call, i.e. the first words for arguments passing. michael@0: */ michael@0: uint32_t *d = end - 3; michael@0: for(uint32_t i = 0; i < paramCount; i++, d++, s++) michael@0: { michael@0: /* Wrap when reaching the end of the stack buffer */ michael@0: if (d == end) d = stk; michael@0: NS_ASSERTION(d >= stk && d < end, michael@0: "invoke_copy_to_stack is copying outside its given buffer"); michael@0: if(s->IsPtrData()) michael@0: { michael@0: *((void**)d) = s->ptr; michael@0: continue; michael@0: } michael@0: // According to the ARM EABI, integral types that are smaller than a word michael@0: // are to be sign/zero-extended to a full word and treated as 4-byte values. michael@0: michael@0: switch(s->type) michael@0: { michael@0: case nsXPTType::T_I8 : *((int32_t*) d) = s->val.i8; break; michael@0: case nsXPTType::T_I16 : *((int32_t*) d) = s->val.i16; break; michael@0: case nsXPTType::T_I32 : *((int32_t*) d) = s->val.i32; break; michael@0: case nsXPTType::T_I64 : michael@0: d = copy_double_word(stk, d, end, (uint64_t *)&s->val.i64); michael@0: break; michael@0: case nsXPTType::T_U8 : *((uint32_t*)d) = s->val.u8; break; michael@0: case nsXPTType::T_U16 : *((uint32_t*)d) = s->val.u16; break; michael@0: case nsXPTType::T_U32 : *((uint32_t*)d) = s->val.u32; break; michael@0: case nsXPTType::T_U64 : michael@0: d = copy_double_word(stk, d, end, (uint64_t *)&s->val.u64); michael@0: break; michael@0: case nsXPTType::T_FLOAT : *((float*) d) = s->val.f; break; michael@0: case nsXPTType::T_DOUBLE : michael@0: d = copy_double_word(stk, d, end, (uint64_t *)&s->val.d); michael@0: break; michael@0: case nsXPTType::T_BOOL : *((int32_t*) d) = s->val.b; break; michael@0: case nsXPTType::T_CHAR : *((int32_t*) d) = s->val.c; break; michael@0: case nsXPTType::T_WCHAR : *((int32_t*) d) = s->val.wc; break; michael@0: default: michael@0: // all the others are plain pointer types michael@0: *((void**)d) = s->val.p; michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: typedef nsresult (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); michael@0: michael@0: EXPORT_XPCOM_API(nsresult) michael@0: NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, michael@0: uint32_t paramCount, nsXPTCVariant* params) michael@0: { michael@0: michael@0: /* This is to call a given method of class that. michael@0: * The parameters are in params, the number is in paramCount. michael@0: * The routine will issue calls to count the number of words michael@0: * required for argument passing and to copy the arguments to michael@0: * the stack. michael@0: * ACPS passes the first 3 params in r1-r3 (with exceptions for 64-bits michael@0: * arguments), and the remaining goes onto the stack. michael@0: * We allocate a buffer on the stack for a "worst case" estimate of how much michael@0: * stack might be needed for EABI, i.e. twice the number of parameters. michael@0: * The end of this buffer will be used to store r1 to r3, so that the start michael@0: * of the stack is the remaining parameters. michael@0: * The magic here is to call the method with "that" and three 32-bits michael@0: * arguments corresponding to r1-r3, so that the compiler generates the michael@0: * proper function call. The stack will also contain the remaining arguments. michael@0: * michael@0: * !!! IMPORTANT !!! michael@0: * This routine makes assumptions about the vtable layout of the c++ compiler. It's implemented michael@0: * for arm-linux GNU g++ >= 2.8.1 (including egcs and gcc-2.95.[1-3])! michael@0: * michael@0: */ michael@0: michael@0: vtable_func *vtable, func; michael@0: int base_size = (paramCount > 1) ? paramCount : 2; michael@0: michael@0: /* !!! IMPORTANT !!! michael@0: * On DEBUG builds, the NS_ASSERTION used in invoke_copy_to_stack needs to use michael@0: * the stack to pass the 5th argument to NS_DebugBreak. When invoke_copy_to_stack michael@0: * is inlined, this can result, depending on the compiler and flags, in the michael@0: * stack pointer not pointing at stack_space when the method is called at the michael@0: * end of this function. More generally, any function call requiring stack michael@0: * allocation of arguments is unsafe to be inlined in this function. michael@0: */ michael@0: uint32_t *stack_space = (uint32_t *) __builtin_alloca(base_size * 8); michael@0: michael@0: invoke_copy_to_stack(stack_space, &stack_space[base_size * 2], michael@0: paramCount, params); michael@0: michael@0: vtable = *reinterpret_cast(that); michael@0: func = vtable[methodIndex]; michael@0: michael@0: return func(that, stack_space[base_size * 2 - 3], michael@0: stack_space[base_size * 2 - 2], michael@0: stack_space[base_size * 2 - 1]); michael@0: } michael@0: michael@0: #else /* __ARM_PCS_VFP */ michael@0: michael@0: /* "Procedure Call Standard for the ARM Architecture" document, sections michael@0: * "5.5 Parameter Passing" and "6.1.2 Procedure Calling" contain all the michael@0: * needed information. michael@0: * michael@0: * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042d/IHI0042D_aapcs.pdf michael@0: */ michael@0: michael@0: #if defined(__thumb__) && !defined(__thumb2__) michael@0: #error "Thumb1 is not supported" michael@0: #endif michael@0: michael@0: #ifndef __ARMEL__ michael@0: #error "Only little endian compatibility was tested" michael@0: #endif michael@0: michael@0: /* michael@0: * Allocation of integer function arguments initially to registers r1-r3 michael@0: * and then to stack. Handling of 'this' argument which goes to r0 registers michael@0: * is handled separately and does not belong to these two inline functions. michael@0: * michael@0: * The doubleword arguments are allocated to even:odd michael@0: * register pairs or get aligned at 8-byte boundary on stack. The "holes" michael@0: * which may appear as a result of this realignment remain unused. michael@0: * michael@0: * 'ireg_args' - pointer to the current position in the buffer, michael@0: * corresponding to the register arguments michael@0: * 'stack_args' - pointer to the current position in the buffer, michael@0: * corresponding to the arguments on stack michael@0: * 'end' - pointer to the end of the registers argument michael@0: * buffer (it is guaranteed to be 8-bytes aligned) michael@0: */ michael@0: michael@0: static inline void copy_word(uint32_t* &ireg_args, michael@0: uint32_t* &stack_args, michael@0: uint32_t* end, michael@0: uint32_t data) michael@0: { michael@0: if (ireg_args < end) { michael@0: *ireg_args = data; michael@0: ireg_args++; michael@0: } else { michael@0: *stack_args = data; michael@0: stack_args++; michael@0: } michael@0: } michael@0: michael@0: static inline void copy_dword(uint32_t* &ireg_args, michael@0: uint32_t* &stack_args, michael@0: uint32_t* end, michael@0: uint64_t data) michael@0: { michael@0: if (ireg_args + 1 < end) { michael@0: if ((uint32_t)ireg_args & 4) { michael@0: ireg_args++; michael@0: } michael@0: *(uint64_t *)ireg_args = data; michael@0: ireg_args += 2; michael@0: } else { michael@0: if ((uint32_t)stack_args & 4) { michael@0: stack_args++; michael@0: } michael@0: *(uint64_t *)stack_args = data; michael@0: stack_args += 2; michael@0: } michael@0: } michael@0: michael@0: /* michael@0: * Allocation of floating point arguments to VFP registers (s0-s15, d0-d7). michael@0: * michael@0: * Unlike integer registers allocation, "back-filling" needs to be michael@0: * supported. For example, the third floating point argument in the michael@0: * following function is going to be allocated to s1 register, back-filling michael@0: * the "hole": michael@0: * void f(float s0, double d1, float s1) michael@0: * michael@0: * Refer to the "Procedure Call Standard for the ARM Architecture" document michael@0: * for more details. michael@0: * michael@0: * 'vfp_s_args' - pointer to the current position in the buffer with michael@0: * the next unallocated single precision register michael@0: * 'vfp_d_args' - pointer to the current position in the buffer with michael@0: * the next unallocated double precision register, michael@0: * it has the same value as 'vfp_s_args' when back-filling michael@0: * is not used michael@0: * 'end' - pointer to the end of the vfp registers argument michael@0: * buffer (it is guaranteed to be 8-bytes aligned) michael@0: * michael@0: * Mozilla bugtracker has a test program attached which be used for michael@0: * experimenting with VFP registers allocation code and testing its michael@0: * correctness: michael@0: * https://bugzilla.mozilla.org/show_bug.cgi?id=601914#c19 michael@0: */ michael@0: michael@0: static inline bool copy_vfp_single(float* &vfp_s_args, double* &vfp_d_args, michael@0: float* end, float data) michael@0: { michael@0: if (vfp_s_args >= end) michael@0: return false; michael@0: michael@0: *vfp_s_args = data; michael@0: vfp_s_args++; michael@0: if (vfp_s_args < (float *)vfp_d_args) { michael@0: // It was the case of back-filling, now the next free single precision michael@0: // register should overlap with the next free double precision register michael@0: vfp_s_args = (float *)vfp_d_args; michael@0: } else if (vfp_s_args > (float *)vfp_d_args) { michael@0: // also update the pointer to the next free double precision register michael@0: vfp_d_args++; michael@0: } michael@0: return true; michael@0: } michael@0: michael@0: static inline bool copy_vfp_double(float* &vfp_s_args, double* &vfp_d_args, michael@0: float* end, double data) michael@0: { michael@0: if (vfp_d_args >= (double *)end) { michael@0: // The back-filling continues only so long as no VFP CPRC has been michael@0: // allocated to a slot on the stack. Basically no VFP registers can michael@0: // be allocated after this point. michael@0: vfp_s_args = end; michael@0: return false; michael@0: } michael@0: michael@0: if (vfp_s_args == (float *)vfp_d_args) { michael@0: // also update the pointer to the next free single precision register michael@0: vfp_s_args += 2; michael@0: } michael@0: *vfp_d_args = data; michael@0: vfp_d_args++; michael@0: return true; michael@0: } michael@0: michael@0: static void michael@0: invoke_copy_to_stack(uint32_t* stk, uint32_t *end, michael@0: uint32_t paramCount, nsXPTCVariant* s) michael@0: { michael@0: uint32_t *ireg_args = end - 3; michael@0: float *vfp_s_args = (float *)end; michael@0: double *vfp_d_args = (double *)end; michael@0: float *vfp_end = vfp_s_args + 16; michael@0: michael@0: for (uint32_t i = 0; i < paramCount; i++, s++) { michael@0: if (s->IsPtrData()) { michael@0: copy_word(ireg_args, stk, end, (uint32_t)s->ptr); michael@0: continue; michael@0: } michael@0: // According to the ARM EABI, integral types that are smaller than a word michael@0: // are to be sign/zero-extended to a full word and treated as 4-byte values michael@0: switch (s->type) michael@0: { michael@0: case nsXPTType::T_FLOAT: michael@0: if (!copy_vfp_single(vfp_s_args, vfp_d_args, vfp_end, s->val.f)) { michael@0: copy_word(end, stk, end, reinterpret_cast(s->val.f)); michael@0: } michael@0: break; michael@0: case nsXPTType::T_DOUBLE: michael@0: if (!copy_vfp_double(vfp_s_args, vfp_d_args, vfp_end, s->val.d)) { michael@0: copy_dword(end, stk, end, reinterpret_cast(s->val.d)); michael@0: } michael@0: break; michael@0: case nsXPTType::T_I8: copy_word(ireg_args, stk, end, s->val.i8); break; michael@0: case nsXPTType::T_I16: copy_word(ireg_args, stk, end, s->val.i16); break; michael@0: case nsXPTType::T_I32: copy_word(ireg_args, stk, end, s->val.i32); break; michael@0: case nsXPTType::T_I64: copy_dword(ireg_args, stk, end, s->val.i64); break; michael@0: case nsXPTType::T_U8: copy_word(ireg_args, stk, end, s->val.u8); break; michael@0: case nsXPTType::T_U16: copy_word(ireg_args, stk, end, s->val.u16); break; michael@0: case nsXPTType::T_U32: copy_word(ireg_args, stk, end, s->val.u32); break; michael@0: case nsXPTType::T_U64: copy_dword(ireg_args, stk, end, s->val.u64); break; michael@0: case nsXPTType::T_BOOL: copy_word(ireg_args, stk, end, s->val.b); break; michael@0: case nsXPTType::T_CHAR: copy_word(ireg_args, stk, end, s->val.c); break; michael@0: case nsXPTType::T_WCHAR: copy_word(ireg_args, stk, end, s->val.wc); break; michael@0: default: michael@0: // all the others are plain pointer types michael@0: copy_word(ireg_args, stk, end, reinterpret_cast(s->val.p)); michael@0: break; michael@0: } michael@0: } michael@0: } michael@0: michael@0: typedef uint32_t (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); michael@0: michael@0: EXPORT_XPCOM_API(nsresult) michael@0: NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, michael@0: uint32_t paramCount, nsXPTCVariant* params) michael@0: { michael@0: vtable_func *vtable = *reinterpret_cast(that); michael@0: vtable_func func = vtable[methodIndex]; michael@0: // 'register uint32_t result asm("r0")' could be used here, but it does not michael@0: // seem to be reliable in all cases: http://gcc.gnu.org/PR46164 michael@0: nsresult result; michael@0: asm ( michael@0: "mov r3, sp\n" michael@0: "mov %[stack_space_size], %[param_count_plus_2], lsl #3\n" michael@0: "tst r3, #4\n" /* check stack alignment */ michael@0: michael@0: "add %[stack_space_size], #(4 * 16)\n" /* space for VFP registers */ michael@0: "mov r3, %[params]\n" michael@0: michael@0: "it ne\n" michael@0: "addne %[stack_space_size], %[stack_space_size], #4\n" michael@0: "sub r0, sp, %[stack_space_size]\n" /* allocate space on stack */ michael@0: michael@0: "sub r2, %[param_count_plus_2], #2\n" michael@0: "mov sp, r0\n" michael@0: michael@0: "add r1, r0, %[param_count_plus_2], lsl #3\n" michael@0: "blx %[invoke_copy_to_stack]\n" michael@0: michael@0: "add ip, sp, %[param_count_plus_2], lsl #3\n" michael@0: "mov r0, %[that]\n" michael@0: "ldmdb ip, {r1, r2, r3}\n" michael@0: "vldm ip, {d0, d1, d2, d3, d4, d5, d6, d7}\n" michael@0: "blx %[func]\n" michael@0: michael@0: "add sp, sp, %[stack_space_size]\n" /* cleanup stack */ michael@0: "mov %[stack_space_size], r0\n" /* it's actually 'result' variable */ michael@0: : [stack_space_size] "=&r" (result) michael@0: : [func] "r" (func), michael@0: [that] "r" (that), michael@0: [params] "r" (params), michael@0: [param_count_plus_2] "r" (paramCount + 2), michael@0: [invoke_copy_to_stack] "r" (invoke_copy_to_stack) michael@0: : "cc", "memory", michael@0: // Mark all the scratch registers as clobbered because they may be michael@0: // modified by the functions, called from this inline assembly block michael@0: "r0", "r1", "r2", "r3", "ip", "lr", michael@0: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", michael@0: // Also unconditionally mark d16-d31 registers as clobbered even though michael@0: // they actually don't exist in vfpv2 and vfpv3-d16 variants. There is michael@0: // no way to identify VFP variant using preprocessor at the momemnt michael@0: // (see http://gcc.gnu.org/PR46128 for more details), but fortunately michael@0: // current versions of gcc do not seem to complain about these registers michael@0: // even when this code is compiled with '-mfpu=vfpv3-d16' option. michael@0: // If gcc becomes more strict in the future and/or provides a way to michael@0: // identify VFP variant, the following d16-d31 registers list needs michael@0: // to be wrapped into some #ifdef michael@0: "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", michael@0: "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" michael@0: ); michael@0: return result; michael@0: } michael@0: michael@0: #endif