xpcom/reflect/xptcall/src/md/unix/xptcinvoke_arm.cpp

changeset 0
6474c204b198
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/xpcom/reflect/xptcall/src/md/unix/xptcinvoke_arm.cpp	Wed Dec 31 06:09:35 2014 +0100
     1.3 @@ -0,0 +1,411 @@
     1.4 +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
     1.5 +/* This Source Code Form is subject to the terms of the Mozilla Public
     1.6 + * License, v. 2.0. If a copy of the MPL was not distributed with this
     1.7 + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     1.8 +
     1.9 +/* Platform specific code to invoke XPCOM methods on native objects */
    1.10 +
    1.11 +#include "xptcprivate.h"
    1.12 +
    1.13 +#include "mozilla/Compiler.h"
    1.14 +
    1.15 +#if !defined(__arm__) && !(defined(LINUX) || defined(ANDROID))
    1.16 +#error "This code is for Linux ARM only. Check that it works on your system, too.\nBeware that this code is highly compiler dependent."
    1.17 +#endif
    1.18 +
    1.19 +#if MOZ_IS_GCC
    1.20 +#if MOZ_GCC_VERSION_AT_LEAST(4, 5, 0) \
    1.21 +    && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS)
    1.22 +#error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP."
    1.23 +#endif
    1.24 +#endif
    1.25 +
    1.26 +#ifndef __ARM_PCS_VFP
    1.27 +
    1.28 +/* This function copies a 64-bits word from dw to the given pointer in
    1.29 + * a buffer delimited by start and end, possibly wrapping around the
    1.30 + * buffer boundaries, and/or properly aligning the data at 64-bits word
    1.31 + * boundaries (for EABI).
    1.32 + * start and end are both assumed to be 64-bits aligned.
    1.33 + * Returns a pointer to the second 32-bits word copied (to accomodate
    1.34 + * the invoke_copy_to_stack loop).
    1.35 + */
    1.36 +static uint32_t *
    1.37 +copy_double_word(uint32_t *start, uint32_t *current, uint32_t *end, uint64_t *dw)
    1.38 +{
    1.39 +#ifdef __ARM_EABI__
    1.40 +    /* Aligning the pointer for EABI */
    1.41 +    current = (uint32_t *)(((uint32_t)current + 7) & ~7);
    1.42 +    /* Wrap when reaching the end of the buffer */
    1.43 +    if (current == end) current = start;
    1.44 +#else
    1.45 +    /* On non-EABI, 64-bits values are not aligned and when we reach the end
    1.46 +     * of the buffer, we need to write half of the data at the end, and the
    1.47 +     * other half at the beginning. */
    1.48 +    if (current == end - 1) {
    1.49 +        *current = ((uint32_t*)dw)[0];
    1.50 +        *start = ((uint32_t*)dw)[1];
    1.51 +        return start;
    1.52 +    }
    1.53 +#endif
    1.54 +
    1.55 +    *((uint64_t*) current) = *dw;
    1.56 +    return current + 1;
    1.57 +}
    1.58 +
    1.59 +/* See stack_space comment in NS_InvokeByIndex to see why this needs not to
    1.60 + * be static on DEBUG builds. */
    1.61 +#ifndef DEBUG
    1.62 +static
    1.63 +#endif
    1.64 +void
    1.65 +invoke_copy_to_stack(uint32_t* stk, uint32_t *end,
    1.66 +                     uint32_t paramCount, nsXPTCVariant* s)
    1.67 +{
    1.68 +    /* The stack buffer is 64-bits aligned. The end argument points to its end.
    1.69 +     * The caller is assumed to create a stack buffer of at least four 32-bits
    1.70 +     * words.
    1.71 +     * We use the last three 32-bit words to store the values for r1, r2 and r3
    1.72 +     * for the method call, i.e. the first words for arguments passing.
    1.73 +     */
    1.74 +    uint32_t *d = end - 3;
    1.75 +    for(uint32_t i = 0; i < paramCount; i++, d++, s++)
    1.76 +    {
    1.77 +        /* Wrap when reaching the end of the stack buffer */
    1.78 +        if (d == end) d = stk;
    1.79 +        NS_ASSERTION(d >= stk && d < end,
    1.80 +            "invoke_copy_to_stack is copying outside its given buffer");
    1.81 +        if(s->IsPtrData())
    1.82 +        {
    1.83 +            *((void**)d) = s->ptr;
    1.84 +            continue;
    1.85 +        }
    1.86 +        // According to the ARM EABI, integral types that are smaller than a word
    1.87 +        // are to be sign/zero-extended to a full word and treated as 4-byte values.
    1.88 +
    1.89 +        switch(s->type)
    1.90 +        {
    1.91 +        case nsXPTType::T_I8     : *((int32_t*) d) = s->val.i8;          break;
    1.92 +        case nsXPTType::T_I16    : *((int32_t*) d) = s->val.i16;         break;
    1.93 +        case nsXPTType::T_I32    : *((int32_t*) d) = s->val.i32;         break;
    1.94 +        case nsXPTType::T_I64    :
    1.95 +            d = copy_double_word(stk, d, end, (uint64_t *)&s->val.i64);
    1.96 +            break;
    1.97 +        case nsXPTType::T_U8     : *((uint32_t*)d) = s->val.u8;          break;
    1.98 +        case nsXPTType::T_U16    : *((uint32_t*)d) = s->val.u16;         break;
    1.99 +        case nsXPTType::T_U32    : *((uint32_t*)d) = s->val.u32;         break;
   1.100 +        case nsXPTType::T_U64    :
   1.101 +            d = copy_double_word(stk, d, end, (uint64_t *)&s->val.u64);
   1.102 +            break;
   1.103 +        case nsXPTType::T_FLOAT  : *((float*)   d) = s->val.f;           break;
   1.104 +        case nsXPTType::T_DOUBLE :
   1.105 +            d = copy_double_word(stk, d, end, (uint64_t *)&s->val.d);
   1.106 +            break;
   1.107 +        case nsXPTType::T_BOOL   : *((int32_t*) d) = s->val.b;           break;
   1.108 +        case nsXPTType::T_CHAR   : *((int32_t*) d) = s->val.c;           break;
   1.109 +        case nsXPTType::T_WCHAR  : *((int32_t*) d) = s->val.wc;          break;
   1.110 +        default:
   1.111 +            // all the others are plain pointer types
   1.112 +            *((void**)d) = s->val.p;
   1.113 +            break;
   1.114 +        }
   1.115 +    }
   1.116 +}
   1.117 +
   1.118 +typedef nsresult (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t);
   1.119 +
   1.120 +EXPORT_XPCOM_API(nsresult)
   1.121 +NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
   1.122 +                   uint32_t paramCount, nsXPTCVariant* params)
   1.123 +{
   1.124 +
   1.125 +/* This is to call a given method of class that.
   1.126 + * The parameters are in params, the number is in paramCount.
   1.127 + * The routine will issue calls to count the number of words
   1.128 + * required for argument passing and to copy the arguments to
   1.129 + * the stack.
   1.130 + * ACPS passes the first 3 params in r1-r3 (with exceptions for 64-bits
   1.131 + * arguments), and the remaining goes onto the stack.
   1.132 + * We allocate a buffer on the stack for a "worst case" estimate of how much
   1.133 + * stack might be needed for EABI, i.e. twice the number of parameters.
   1.134 + * The end of this buffer will be used to store r1 to r3, so that the start
   1.135 + * of the stack is the remaining parameters.
   1.136 + * The magic here is to call the method with "that" and three 32-bits
   1.137 + * arguments corresponding to r1-r3, so that the compiler generates the
   1.138 + * proper function call. The stack will also contain the remaining arguments.
   1.139 + *
   1.140 + * !!! IMPORTANT !!!
   1.141 + * This routine makes assumptions about the vtable layout of the c++ compiler. It's implemented
   1.142 + * for arm-linux GNU g++ >= 2.8.1 (including egcs and gcc-2.95.[1-3])!
   1.143 + *
   1.144 + */
   1.145 + 
   1.146 +  vtable_func *vtable, func;
   1.147 +  int base_size = (paramCount > 1) ? paramCount : 2;
   1.148 +
   1.149 +/* !!! IMPORTANT !!!
   1.150 + * On DEBUG builds, the NS_ASSERTION used in invoke_copy_to_stack needs to use
   1.151 + * the stack to pass the 5th argument to NS_DebugBreak. When invoke_copy_to_stack
   1.152 + * is inlined, this can result, depending on the compiler and flags, in the
   1.153 + * stack pointer not pointing at stack_space when the method is called at the
   1.154 + * end of this function. More generally, any function call requiring stack
   1.155 + * allocation of arguments is unsafe to be inlined in this function.
   1.156 + */
   1.157 +  uint32_t *stack_space = (uint32_t *) __builtin_alloca(base_size * 8);
   1.158 +
   1.159 +  invoke_copy_to_stack(stack_space, &stack_space[base_size * 2],
   1.160 +                       paramCount, params);
   1.161 +
   1.162 +  vtable = *reinterpret_cast<vtable_func **>(that);
   1.163 +  func = vtable[methodIndex];
   1.164 +
   1.165 +  return func(that, stack_space[base_size * 2 - 3],
   1.166 +                    stack_space[base_size * 2 - 2],
   1.167 +                    stack_space[base_size * 2 - 1]);
   1.168 +}    
   1.169 +
   1.170 +#else /* __ARM_PCS_VFP */
   1.171 +
   1.172 +/* "Procedure Call Standard for the ARM Architecture" document, sections
   1.173 + * "5.5 Parameter Passing" and "6.1.2 Procedure Calling" contain all the
   1.174 + * needed information.
   1.175 + *
   1.176 + * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042d/IHI0042D_aapcs.pdf
   1.177 + */
   1.178 +
   1.179 +#if defined(__thumb__) && !defined(__thumb2__)
   1.180 +#error "Thumb1 is not supported"
   1.181 +#endif
   1.182 +
   1.183 +#ifndef __ARMEL__
   1.184 +#error "Only little endian compatibility was tested"
   1.185 +#endif
   1.186 +
   1.187 +/*
   1.188 + * Allocation of integer function arguments initially to registers r1-r3
   1.189 + * and then to stack. Handling of 'this' argument which goes to r0 registers
   1.190 + * is handled separately and does not belong to these two inline functions.
   1.191 + *
   1.192 + * The doubleword arguments are allocated to even:odd
   1.193 + * register pairs or get aligned at 8-byte boundary on stack. The "holes"
   1.194 + * which may appear as a result of this realignment remain unused.
   1.195 + *
   1.196 + * 'ireg_args'  - pointer to the current position in the buffer,
   1.197 + *                corresponding to the register arguments
   1.198 + * 'stack_args' - pointer to the current position in the buffer,
   1.199 + *                corresponding to the arguments on stack
   1.200 + * 'end'        - pointer to the end of the registers argument
   1.201 + *                buffer (it is guaranteed to be 8-bytes aligned)
   1.202 + */
   1.203 +
   1.204 +static inline void copy_word(uint32_t* &ireg_args,
   1.205 +                             uint32_t* &stack_args,
   1.206 +                             uint32_t* end,
   1.207 +                             uint32_t  data)
   1.208 +{
   1.209 +  if (ireg_args < end) {
   1.210 +    *ireg_args = data;
   1.211 +    ireg_args++;
   1.212 +  } else {
   1.213 +    *stack_args = data;
   1.214 +    stack_args++;
   1.215 +  }
   1.216 +}
   1.217 +
   1.218 +static inline void copy_dword(uint32_t* &ireg_args,
   1.219 +                              uint32_t* &stack_args,
   1.220 +                              uint32_t* end,
   1.221 +                              uint64_t  data)
   1.222 +{
   1.223 +  if (ireg_args + 1 < end) {
   1.224 +    if ((uint32_t)ireg_args & 4) {
   1.225 +      ireg_args++;
   1.226 +    }
   1.227 +    *(uint64_t *)ireg_args = data;
   1.228 +    ireg_args += 2;
   1.229 +  } else {
   1.230 +    if ((uint32_t)stack_args & 4) {
   1.231 +      stack_args++;
   1.232 +    }
   1.233 +    *(uint64_t *)stack_args = data;
   1.234 +    stack_args += 2;
   1.235 +  }
   1.236 +}
   1.237 +
   1.238 +/*
   1.239 + * Allocation of floating point arguments to VFP registers (s0-s15, d0-d7).
   1.240 + *
   1.241 + * Unlike integer registers allocation, "back-filling" needs to be
   1.242 + * supported. For example, the third floating point argument in the
   1.243 + * following function is going to be allocated to s1 register, back-filling
   1.244 + * the "hole":
   1.245 + *    void f(float s0, double d1, float s1)
   1.246 + *
   1.247 + * Refer to the "Procedure Call Standard for the ARM Architecture" document
   1.248 + * for more details.
   1.249 + *
   1.250 + * 'vfp_s_args' - pointer to the current position in the buffer with
   1.251 + *                the next unallocated single precision register
   1.252 + * 'vfp_d_args' - pointer to the current position in the buffer with
   1.253 + *                the next unallocated double precision register,
   1.254 + *                it has the same value as 'vfp_s_args' when back-filling
   1.255 + *                is not used
   1.256 + * 'end'        - pointer to the end of the vfp registers argument
   1.257 + *                buffer (it is guaranteed to be 8-bytes aligned)
   1.258 + *
   1.259 + * Mozilla bugtracker has a test program attached which be used for
   1.260 + * experimenting with VFP registers allocation code and testing its
   1.261 + * correctness:
   1.262 + * https://bugzilla.mozilla.org/show_bug.cgi?id=601914#c19
   1.263 + */
   1.264 +
   1.265 +static inline bool copy_vfp_single(float* &vfp_s_args, double* &vfp_d_args,
   1.266 +                                   float* end, float data)
   1.267 +{
   1.268 +  if (vfp_s_args >= end)
   1.269 +    return false;
   1.270 +
   1.271 +  *vfp_s_args = data;
   1.272 +  vfp_s_args++;
   1.273 +  if (vfp_s_args < (float *)vfp_d_args) {
   1.274 +    // It was the case of back-filling, now the next free single precision
   1.275 +    // register should overlap with the next free double precision register
   1.276 +    vfp_s_args = (float *)vfp_d_args;
   1.277 +  } else if (vfp_s_args > (float *)vfp_d_args) {
   1.278 +    // also update the pointer to the next free double precision register
   1.279 +    vfp_d_args++;
   1.280 +  }
   1.281 +  return true;
   1.282 +}
   1.283 +
   1.284 +static inline bool copy_vfp_double(float* &vfp_s_args, double* &vfp_d_args,
   1.285 +                                   float* end, double data)
   1.286 +{
   1.287 +  if (vfp_d_args >= (double *)end) {
   1.288 +    // The back-filling continues only so long as no VFP CPRC has been
   1.289 +    // allocated to a slot on the stack. Basically no VFP registers can
   1.290 +    // be allocated after this point.
   1.291 +    vfp_s_args = end;
   1.292 +    return false;
   1.293 +  }
   1.294 +
   1.295 +  if (vfp_s_args == (float *)vfp_d_args) {
   1.296 +    // also update the pointer to the next free single precision register
   1.297 +    vfp_s_args += 2;
   1.298 +  }
   1.299 +  *vfp_d_args = data;
   1.300 +  vfp_d_args++;
   1.301 +  return true;
   1.302 +}
   1.303 +
   1.304 +static void
   1.305 +invoke_copy_to_stack(uint32_t* stk, uint32_t *end,
   1.306 +                     uint32_t paramCount, nsXPTCVariant* s)
   1.307 +{
   1.308 +  uint32_t *ireg_args  = end - 3;
   1.309 +  float    *vfp_s_args = (float *)end;
   1.310 +  double   *vfp_d_args = (double *)end;
   1.311 +  float    *vfp_end    = vfp_s_args + 16;
   1.312 +
   1.313 +  for (uint32_t i = 0; i < paramCount; i++, s++) {
   1.314 +    if (s->IsPtrData()) {
   1.315 +      copy_word(ireg_args, stk, end, (uint32_t)s->ptr);
   1.316 +      continue;
   1.317 +    }
   1.318 +    // According to the ARM EABI, integral types that are smaller than a word
   1.319 +    // are to be sign/zero-extended to a full word and treated as 4-byte values
   1.320 +    switch (s->type)
   1.321 +    {
   1.322 +      case nsXPTType::T_FLOAT:
   1.323 +        if (!copy_vfp_single(vfp_s_args, vfp_d_args, vfp_end, s->val.f)) {
   1.324 +          copy_word(end, stk, end, reinterpret_cast<uint32_t&>(s->val.f));
   1.325 +        }
   1.326 +        break;
   1.327 +      case nsXPTType::T_DOUBLE:
   1.328 +        if (!copy_vfp_double(vfp_s_args, vfp_d_args, vfp_end, s->val.d)) {
   1.329 +          copy_dword(end, stk, end, reinterpret_cast<uint64_t&>(s->val.d));
   1.330 +        }
   1.331 +        break;
   1.332 +      case nsXPTType::T_I8:  copy_word(ireg_args, stk, end, s->val.i8);   break;
   1.333 +      case nsXPTType::T_I16: copy_word(ireg_args, stk, end, s->val.i16);  break;
   1.334 +      case nsXPTType::T_I32: copy_word(ireg_args, stk, end, s->val.i32);  break;
   1.335 +      case nsXPTType::T_I64: copy_dword(ireg_args, stk, end, s->val.i64); break;
   1.336 +      case nsXPTType::T_U8:  copy_word(ireg_args, stk, end, s->val.u8);   break;
   1.337 +      case nsXPTType::T_U16: copy_word(ireg_args, stk, end, s->val.u16);  break;
   1.338 +      case nsXPTType::T_U32: copy_word(ireg_args, stk, end, s->val.u32);  break;
   1.339 +      case nsXPTType::T_U64: copy_dword(ireg_args, stk, end, s->val.u64); break;
   1.340 +      case nsXPTType::T_BOOL: copy_word(ireg_args, stk, end, s->val.b);   break;
   1.341 +      case nsXPTType::T_CHAR: copy_word(ireg_args, stk, end, s->val.c);   break;
   1.342 +      case nsXPTType::T_WCHAR: copy_word(ireg_args, stk, end, s->val.wc); break;
   1.343 +      default:
   1.344 +        // all the others are plain pointer types
   1.345 +        copy_word(ireg_args, stk, end, reinterpret_cast<uint32_t>(s->val.p));
   1.346 +        break;
   1.347 +    }
   1.348 +  }
   1.349 +}
   1.350 +
   1.351 +typedef uint32_t (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t);
   1.352 +
   1.353 +EXPORT_XPCOM_API(nsresult)
   1.354 +NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex,
   1.355 +                   uint32_t paramCount, nsXPTCVariant* params)
   1.356 +{
   1.357 +  vtable_func *vtable = *reinterpret_cast<vtable_func **>(that);
   1.358 +  vtable_func func = vtable[methodIndex];
   1.359 +  // 'register uint32_t result asm("r0")' could be used here, but it does not
   1.360 +  //  seem to be reliable in all cases: http://gcc.gnu.org/PR46164
   1.361 +  nsresult result;
   1.362 +  asm (
   1.363 +    "mov    r3, sp\n"
   1.364 +    "mov    %[stack_space_size], %[param_count_plus_2], lsl #3\n"
   1.365 +    "tst    r3, #4\n" /* check stack alignment */
   1.366 +
   1.367 +    "add    %[stack_space_size], #(4 * 16)\n" /* space for VFP registers */
   1.368 +    "mov    r3, %[params]\n"
   1.369 +
   1.370 +    "it     ne\n"
   1.371 +    "addne  %[stack_space_size], %[stack_space_size], #4\n"
   1.372 +    "sub    r0, sp, %[stack_space_size]\n" /* allocate space on stack */
   1.373 +
   1.374 +    "sub    r2, %[param_count_plus_2], #2\n"
   1.375 +    "mov    sp, r0\n"
   1.376 +
   1.377 +    "add    r1, r0, %[param_count_plus_2], lsl #3\n"
   1.378 +    "blx    %[invoke_copy_to_stack]\n"
   1.379 +
   1.380 +    "add    ip, sp, %[param_count_plus_2], lsl #3\n"
   1.381 +    "mov    r0, %[that]\n"
   1.382 +    "ldmdb  ip, {r1, r2, r3}\n"
   1.383 +    "vldm   ip, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
   1.384 +    "blx    %[func]\n"
   1.385 +
   1.386 +    "add    sp, sp, %[stack_space_size]\n" /* cleanup stack */
   1.387 +    "mov    %[stack_space_size], r0\n" /* it's actually 'result' variable */
   1.388 +    : [stack_space_size]     "=&r" (result)
   1.389 +    : [func]                 "r"   (func),
   1.390 +      [that]                 "r"   (that),
   1.391 +      [params]               "r"   (params),
   1.392 +      [param_count_plus_2]   "r"   (paramCount + 2),
   1.393 +      [invoke_copy_to_stack] "r"   (invoke_copy_to_stack)
   1.394 +    : "cc", "memory",
   1.395 +      // Mark all the scratch registers as clobbered because they may be
   1.396 +      // modified by the functions, called from this inline assembly block
   1.397 +      "r0", "r1", "r2", "r3", "ip", "lr",
   1.398 +      "d0",  "d1",  "d2",  "d3",  "d4",  "d5",  "d6",  "d7",
   1.399 +      // Also unconditionally mark d16-d31 registers as clobbered even though
   1.400 +      // they actually don't exist in vfpv2 and vfpv3-d16 variants. There is
   1.401 +      // no way to identify VFP variant using preprocessor at the momemnt
   1.402 +      // (see http://gcc.gnu.org/PR46128 for more details), but fortunately
   1.403 +      // current versions of gcc do not seem to complain about these registers
   1.404 +      // even when this code is compiled with '-mfpu=vfpv3-d16' option.
   1.405 +      // If gcc becomes more strict in the future and/or provides a way to
   1.406 +      // identify VFP variant, the following d16-d31 registers list needs
   1.407 +      // to be wrapped into some #ifdef
   1.408 +      "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23",
   1.409 +      "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"
   1.410 +  );
   1.411 +  return result;
   1.412 +}
   1.413 +
   1.414 +#endif

mercurial