|
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ |
|
2 /* This Source Code Form is subject to the terms of the Mozilla Public |
|
3 * License, v. 2.0. If a copy of the MPL was not distributed with this |
|
4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
|
5 |
|
6 /* Platform specific code to invoke XPCOM methods on native objects */ |
|
7 |
|
8 #include "xptcprivate.h" |
|
9 |
|
10 #include "mozilla/Compiler.h" |
|
11 |
|
12 #if !defined(__arm__) && !(defined(LINUX) || defined(ANDROID)) |
|
13 #error "This code is for Linux ARM only. Check that it works on your system, too.\nBeware that this code is highly compiler dependent." |
|
14 #endif |
|
15 |
|
16 #if MOZ_IS_GCC |
|
17 #if MOZ_GCC_VERSION_AT_LEAST(4, 5, 0) \ |
|
18 && defined(__ARM_EABI__) && !defined(__ARM_PCS_VFP) && !defined(__ARM_PCS) |
|
19 #error "Can't identify floating point calling conventions.\nPlease ensure that your toolchain defines __ARM_PCS or __ARM_PCS_VFP." |
|
20 #endif |
|
21 #endif |
|
22 |
|
23 #ifndef __ARM_PCS_VFP |
|
24 |
|
25 /* This function copies a 64-bits word from dw to the given pointer in |
|
26 * a buffer delimited by start and end, possibly wrapping around the |
|
27 * buffer boundaries, and/or properly aligning the data at 64-bits word |
|
28 * boundaries (for EABI). |
|
29 * start and end are both assumed to be 64-bits aligned. |
|
30 * Returns a pointer to the second 32-bits word copied (to accomodate |
|
31 * the invoke_copy_to_stack loop). |
|
32 */ |
|
33 static uint32_t * |
|
34 copy_double_word(uint32_t *start, uint32_t *current, uint32_t *end, uint64_t *dw) |
|
35 { |
|
36 #ifdef __ARM_EABI__ |
|
37 /* Aligning the pointer for EABI */ |
|
38 current = (uint32_t *)(((uint32_t)current + 7) & ~7); |
|
39 /* Wrap when reaching the end of the buffer */ |
|
40 if (current == end) current = start; |
|
41 #else |
|
42 /* On non-EABI, 64-bits values are not aligned and when we reach the end |
|
43 * of the buffer, we need to write half of the data at the end, and the |
|
44 * other half at the beginning. */ |
|
45 if (current == end - 1) { |
|
46 *current = ((uint32_t*)dw)[0]; |
|
47 *start = ((uint32_t*)dw)[1]; |
|
48 return start; |
|
49 } |
|
50 #endif |
|
51 |
|
52 *((uint64_t*) current) = *dw; |
|
53 return current + 1; |
|
54 } |
|
55 |
|
56 /* See stack_space comment in NS_InvokeByIndex to see why this needs not to |
|
57 * be static on DEBUG builds. */ |
|
58 #ifndef DEBUG |
|
59 static |
|
60 #endif |
|
61 void |
|
62 invoke_copy_to_stack(uint32_t* stk, uint32_t *end, |
|
63 uint32_t paramCount, nsXPTCVariant* s) |
|
64 { |
|
65 /* The stack buffer is 64-bits aligned. The end argument points to its end. |
|
66 * The caller is assumed to create a stack buffer of at least four 32-bits |
|
67 * words. |
|
68 * We use the last three 32-bit words to store the values for r1, r2 and r3 |
|
69 * for the method call, i.e. the first words for arguments passing. |
|
70 */ |
|
71 uint32_t *d = end - 3; |
|
72 for(uint32_t i = 0; i < paramCount; i++, d++, s++) |
|
73 { |
|
74 /* Wrap when reaching the end of the stack buffer */ |
|
75 if (d == end) d = stk; |
|
76 NS_ASSERTION(d >= stk && d < end, |
|
77 "invoke_copy_to_stack is copying outside its given buffer"); |
|
78 if(s->IsPtrData()) |
|
79 { |
|
80 *((void**)d) = s->ptr; |
|
81 continue; |
|
82 } |
|
83 // According to the ARM EABI, integral types that are smaller than a word |
|
84 // are to be sign/zero-extended to a full word and treated as 4-byte values. |
|
85 |
|
86 switch(s->type) |
|
87 { |
|
88 case nsXPTType::T_I8 : *((int32_t*) d) = s->val.i8; break; |
|
89 case nsXPTType::T_I16 : *((int32_t*) d) = s->val.i16; break; |
|
90 case nsXPTType::T_I32 : *((int32_t*) d) = s->val.i32; break; |
|
91 case nsXPTType::T_I64 : |
|
92 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.i64); |
|
93 break; |
|
94 case nsXPTType::T_U8 : *((uint32_t*)d) = s->val.u8; break; |
|
95 case nsXPTType::T_U16 : *((uint32_t*)d) = s->val.u16; break; |
|
96 case nsXPTType::T_U32 : *((uint32_t*)d) = s->val.u32; break; |
|
97 case nsXPTType::T_U64 : |
|
98 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.u64); |
|
99 break; |
|
100 case nsXPTType::T_FLOAT : *((float*) d) = s->val.f; break; |
|
101 case nsXPTType::T_DOUBLE : |
|
102 d = copy_double_word(stk, d, end, (uint64_t *)&s->val.d); |
|
103 break; |
|
104 case nsXPTType::T_BOOL : *((int32_t*) d) = s->val.b; break; |
|
105 case nsXPTType::T_CHAR : *((int32_t*) d) = s->val.c; break; |
|
106 case nsXPTType::T_WCHAR : *((int32_t*) d) = s->val.wc; break; |
|
107 default: |
|
108 // all the others are plain pointer types |
|
109 *((void**)d) = s->val.p; |
|
110 break; |
|
111 } |
|
112 } |
|
113 } |
|
114 |
|
115 typedef nsresult (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); |
|
116 |
|
117 EXPORT_XPCOM_API(nsresult) |
|
118 NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, |
|
119 uint32_t paramCount, nsXPTCVariant* params) |
|
120 { |
|
121 |
|
122 /* This is to call a given method of class that. |
|
123 * The parameters are in params, the number is in paramCount. |
|
124 * The routine will issue calls to count the number of words |
|
125 * required for argument passing and to copy the arguments to |
|
126 * the stack. |
|
127 * ACPS passes the first 3 params in r1-r3 (with exceptions for 64-bits |
|
128 * arguments), and the remaining goes onto the stack. |
|
129 * We allocate a buffer on the stack for a "worst case" estimate of how much |
|
130 * stack might be needed for EABI, i.e. twice the number of parameters. |
|
131 * The end of this buffer will be used to store r1 to r3, so that the start |
|
132 * of the stack is the remaining parameters. |
|
133 * The magic here is to call the method with "that" and three 32-bits |
|
134 * arguments corresponding to r1-r3, so that the compiler generates the |
|
135 * proper function call. The stack will also contain the remaining arguments. |
|
136 * |
|
137 * !!! IMPORTANT !!! |
|
138 * This routine makes assumptions about the vtable layout of the c++ compiler. It's implemented |
|
139 * for arm-linux GNU g++ >= 2.8.1 (including egcs and gcc-2.95.[1-3])! |
|
140 * |
|
141 */ |
|
142 |
|
143 vtable_func *vtable, func; |
|
144 int base_size = (paramCount > 1) ? paramCount : 2; |
|
145 |
|
146 /* !!! IMPORTANT !!! |
|
147 * On DEBUG builds, the NS_ASSERTION used in invoke_copy_to_stack needs to use |
|
148 * the stack to pass the 5th argument to NS_DebugBreak. When invoke_copy_to_stack |
|
149 * is inlined, this can result, depending on the compiler and flags, in the |
|
150 * stack pointer not pointing at stack_space when the method is called at the |
|
151 * end of this function. More generally, any function call requiring stack |
|
152 * allocation of arguments is unsafe to be inlined in this function. |
|
153 */ |
|
154 uint32_t *stack_space = (uint32_t *) __builtin_alloca(base_size * 8); |
|
155 |
|
156 invoke_copy_to_stack(stack_space, &stack_space[base_size * 2], |
|
157 paramCount, params); |
|
158 |
|
159 vtable = *reinterpret_cast<vtable_func **>(that); |
|
160 func = vtable[methodIndex]; |
|
161 |
|
162 return func(that, stack_space[base_size * 2 - 3], |
|
163 stack_space[base_size * 2 - 2], |
|
164 stack_space[base_size * 2 - 1]); |
|
165 } |
|
166 |
|
167 #else /* __ARM_PCS_VFP */ |
|
168 |
|
169 /* "Procedure Call Standard for the ARM Architecture" document, sections |
|
170 * "5.5 Parameter Passing" and "6.1.2 Procedure Calling" contain all the |
|
171 * needed information. |
|
172 * |
|
173 * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042d/IHI0042D_aapcs.pdf |
|
174 */ |
|
175 |
|
176 #if defined(__thumb__) && !defined(__thumb2__) |
|
177 #error "Thumb1 is not supported" |
|
178 #endif |
|
179 |
|
180 #ifndef __ARMEL__ |
|
181 #error "Only little endian compatibility was tested" |
|
182 #endif |
|
183 |
|
184 /* |
|
185 * Allocation of integer function arguments initially to registers r1-r3 |
|
186 * and then to stack. Handling of 'this' argument which goes to r0 registers |
|
187 * is handled separately and does not belong to these two inline functions. |
|
188 * |
|
189 * The doubleword arguments are allocated to even:odd |
|
190 * register pairs or get aligned at 8-byte boundary on stack. The "holes" |
|
191 * which may appear as a result of this realignment remain unused. |
|
192 * |
|
193 * 'ireg_args' - pointer to the current position in the buffer, |
|
194 * corresponding to the register arguments |
|
195 * 'stack_args' - pointer to the current position in the buffer, |
|
196 * corresponding to the arguments on stack |
|
197 * 'end' - pointer to the end of the registers argument |
|
198 * buffer (it is guaranteed to be 8-bytes aligned) |
|
199 */ |
|
200 |
|
201 static inline void copy_word(uint32_t* &ireg_args, |
|
202 uint32_t* &stack_args, |
|
203 uint32_t* end, |
|
204 uint32_t data) |
|
205 { |
|
206 if (ireg_args < end) { |
|
207 *ireg_args = data; |
|
208 ireg_args++; |
|
209 } else { |
|
210 *stack_args = data; |
|
211 stack_args++; |
|
212 } |
|
213 } |
|
214 |
|
215 static inline void copy_dword(uint32_t* &ireg_args, |
|
216 uint32_t* &stack_args, |
|
217 uint32_t* end, |
|
218 uint64_t data) |
|
219 { |
|
220 if (ireg_args + 1 < end) { |
|
221 if ((uint32_t)ireg_args & 4) { |
|
222 ireg_args++; |
|
223 } |
|
224 *(uint64_t *)ireg_args = data; |
|
225 ireg_args += 2; |
|
226 } else { |
|
227 if ((uint32_t)stack_args & 4) { |
|
228 stack_args++; |
|
229 } |
|
230 *(uint64_t *)stack_args = data; |
|
231 stack_args += 2; |
|
232 } |
|
233 } |
|
234 |
|
235 /* |
|
236 * Allocation of floating point arguments to VFP registers (s0-s15, d0-d7). |
|
237 * |
|
238 * Unlike integer registers allocation, "back-filling" needs to be |
|
239 * supported. For example, the third floating point argument in the |
|
240 * following function is going to be allocated to s1 register, back-filling |
|
241 * the "hole": |
|
242 * void f(float s0, double d1, float s1) |
|
243 * |
|
244 * Refer to the "Procedure Call Standard for the ARM Architecture" document |
|
245 * for more details. |
|
246 * |
|
247 * 'vfp_s_args' - pointer to the current position in the buffer with |
|
248 * the next unallocated single precision register |
|
249 * 'vfp_d_args' - pointer to the current position in the buffer with |
|
250 * the next unallocated double precision register, |
|
251 * it has the same value as 'vfp_s_args' when back-filling |
|
252 * is not used |
|
253 * 'end' - pointer to the end of the vfp registers argument |
|
254 * buffer (it is guaranteed to be 8-bytes aligned) |
|
255 * |
|
256 * Mozilla bugtracker has a test program attached which be used for |
|
257 * experimenting with VFP registers allocation code and testing its |
|
258 * correctness: |
|
259 * https://bugzilla.mozilla.org/show_bug.cgi?id=601914#c19 |
|
260 */ |
|
261 |
|
262 static inline bool copy_vfp_single(float* &vfp_s_args, double* &vfp_d_args, |
|
263 float* end, float data) |
|
264 { |
|
265 if (vfp_s_args >= end) |
|
266 return false; |
|
267 |
|
268 *vfp_s_args = data; |
|
269 vfp_s_args++; |
|
270 if (vfp_s_args < (float *)vfp_d_args) { |
|
271 // It was the case of back-filling, now the next free single precision |
|
272 // register should overlap with the next free double precision register |
|
273 vfp_s_args = (float *)vfp_d_args; |
|
274 } else if (vfp_s_args > (float *)vfp_d_args) { |
|
275 // also update the pointer to the next free double precision register |
|
276 vfp_d_args++; |
|
277 } |
|
278 return true; |
|
279 } |
|
280 |
|
281 static inline bool copy_vfp_double(float* &vfp_s_args, double* &vfp_d_args, |
|
282 float* end, double data) |
|
283 { |
|
284 if (vfp_d_args >= (double *)end) { |
|
285 // The back-filling continues only so long as no VFP CPRC has been |
|
286 // allocated to a slot on the stack. Basically no VFP registers can |
|
287 // be allocated after this point. |
|
288 vfp_s_args = end; |
|
289 return false; |
|
290 } |
|
291 |
|
292 if (vfp_s_args == (float *)vfp_d_args) { |
|
293 // also update the pointer to the next free single precision register |
|
294 vfp_s_args += 2; |
|
295 } |
|
296 *vfp_d_args = data; |
|
297 vfp_d_args++; |
|
298 return true; |
|
299 } |
|
300 |
|
301 static void |
|
302 invoke_copy_to_stack(uint32_t* stk, uint32_t *end, |
|
303 uint32_t paramCount, nsXPTCVariant* s) |
|
304 { |
|
305 uint32_t *ireg_args = end - 3; |
|
306 float *vfp_s_args = (float *)end; |
|
307 double *vfp_d_args = (double *)end; |
|
308 float *vfp_end = vfp_s_args + 16; |
|
309 |
|
310 for (uint32_t i = 0; i < paramCount; i++, s++) { |
|
311 if (s->IsPtrData()) { |
|
312 copy_word(ireg_args, stk, end, (uint32_t)s->ptr); |
|
313 continue; |
|
314 } |
|
315 // According to the ARM EABI, integral types that are smaller than a word |
|
316 // are to be sign/zero-extended to a full word and treated as 4-byte values |
|
317 switch (s->type) |
|
318 { |
|
319 case nsXPTType::T_FLOAT: |
|
320 if (!copy_vfp_single(vfp_s_args, vfp_d_args, vfp_end, s->val.f)) { |
|
321 copy_word(end, stk, end, reinterpret_cast<uint32_t&>(s->val.f)); |
|
322 } |
|
323 break; |
|
324 case nsXPTType::T_DOUBLE: |
|
325 if (!copy_vfp_double(vfp_s_args, vfp_d_args, vfp_end, s->val.d)) { |
|
326 copy_dword(end, stk, end, reinterpret_cast<uint64_t&>(s->val.d)); |
|
327 } |
|
328 break; |
|
329 case nsXPTType::T_I8: copy_word(ireg_args, stk, end, s->val.i8); break; |
|
330 case nsXPTType::T_I16: copy_word(ireg_args, stk, end, s->val.i16); break; |
|
331 case nsXPTType::T_I32: copy_word(ireg_args, stk, end, s->val.i32); break; |
|
332 case nsXPTType::T_I64: copy_dword(ireg_args, stk, end, s->val.i64); break; |
|
333 case nsXPTType::T_U8: copy_word(ireg_args, stk, end, s->val.u8); break; |
|
334 case nsXPTType::T_U16: copy_word(ireg_args, stk, end, s->val.u16); break; |
|
335 case nsXPTType::T_U32: copy_word(ireg_args, stk, end, s->val.u32); break; |
|
336 case nsXPTType::T_U64: copy_dword(ireg_args, stk, end, s->val.u64); break; |
|
337 case nsXPTType::T_BOOL: copy_word(ireg_args, stk, end, s->val.b); break; |
|
338 case nsXPTType::T_CHAR: copy_word(ireg_args, stk, end, s->val.c); break; |
|
339 case nsXPTType::T_WCHAR: copy_word(ireg_args, stk, end, s->val.wc); break; |
|
340 default: |
|
341 // all the others are plain pointer types |
|
342 copy_word(ireg_args, stk, end, reinterpret_cast<uint32_t>(s->val.p)); |
|
343 break; |
|
344 } |
|
345 } |
|
346 } |
|
347 |
|
348 typedef uint32_t (*vtable_func)(nsISupports *, uint32_t, uint32_t, uint32_t); |
|
349 |
|
350 EXPORT_XPCOM_API(nsresult) |
|
351 NS_InvokeByIndex(nsISupports* that, uint32_t methodIndex, |
|
352 uint32_t paramCount, nsXPTCVariant* params) |
|
353 { |
|
354 vtable_func *vtable = *reinterpret_cast<vtable_func **>(that); |
|
355 vtable_func func = vtable[methodIndex]; |
|
356 // 'register uint32_t result asm("r0")' could be used here, but it does not |
|
357 // seem to be reliable in all cases: http://gcc.gnu.org/PR46164 |
|
358 nsresult result; |
|
359 asm ( |
|
360 "mov r3, sp\n" |
|
361 "mov %[stack_space_size], %[param_count_plus_2], lsl #3\n" |
|
362 "tst r3, #4\n" /* check stack alignment */ |
|
363 |
|
364 "add %[stack_space_size], #(4 * 16)\n" /* space for VFP registers */ |
|
365 "mov r3, %[params]\n" |
|
366 |
|
367 "it ne\n" |
|
368 "addne %[stack_space_size], %[stack_space_size], #4\n" |
|
369 "sub r0, sp, %[stack_space_size]\n" /* allocate space on stack */ |
|
370 |
|
371 "sub r2, %[param_count_plus_2], #2\n" |
|
372 "mov sp, r0\n" |
|
373 |
|
374 "add r1, r0, %[param_count_plus_2], lsl #3\n" |
|
375 "blx %[invoke_copy_to_stack]\n" |
|
376 |
|
377 "add ip, sp, %[param_count_plus_2], lsl #3\n" |
|
378 "mov r0, %[that]\n" |
|
379 "ldmdb ip, {r1, r2, r3}\n" |
|
380 "vldm ip, {d0, d1, d2, d3, d4, d5, d6, d7}\n" |
|
381 "blx %[func]\n" |
|
382 |
|
383 "add sp, sp, %[stack_space_size]\n" /* cleanup stack */ |
|
384 "mov %[stack_space_size], r0\n" /* it's actually 'result' variable */ |
|
385 : [stack_space_size] "=&r" (result) |
|
386 : [func] "r" (func), |
|
387 [that] "r" (that), |
|
388 [params] "r" (params), |
|
389 [param_count_plus_2] "r" (paramCount + 2), |
|
390 [invoke_copy_to_stack] "r" (invoke_copy_to_stack) |
|
391 : "cc", "memory", |
|
392 // Mark all the scratch registers as clobbered because they may be |
|
393 // modified by the functions, called from this inline assembly block |
|
394 "r0", "r1", "r2", "r3", "ip", "lr", |
|
395 "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", |
|
396 // Also unconditionally mark d16-d31 registers as clobbered even though |
|
397 // they actually don't exist in vfpv2 and vfpv3-d16 variants. There is |
|
398 // no way to identify VFP variant using preprocessor at the momemnt |
|
399 // (see http://gcc.gnu.org/PR46128 for more details), but fortunately |
|
400 // current versions of gcc do not seem to complain about these registers |
|
401 // even when this code is compiled with '-mfpu=vfpv3-d16' option. |
|
402 // If gcc becomes more strict in the future and/or provides a way to |
|
403 // identify VFP variant, the following d16-d31 registers list needs |
|
404 // to be wrapped into some #ifdef |
|
405 "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", |
|
406 "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31" |
|
407 ); |
|
408 return result; |
|
409 } |
|
410 |
|
411 #endif |