Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* This Source Code Form is subject to the terms of the Mozilla Public |
michael@0 | 2 | * License, v. 2.0. If a copy of the MPL was not distributed with this |
michael@0 | 3 | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
michael@0 | 4 | |
michael@0 | 5 | .arch armv7-a |
michael@0 | 6 | .fpu neon |
michael@0 | 7 | /* Allow to build on targets not supporting neon, and force the object file |
michael@0 | 8 | * target to avoid bumping the final binary target */ |
michael@0 | 9 | .object_arch armv4t |
michael@0 | 10 | .text |
michael@0 | 11 | .align |
michael@0 | 12 | |
michael@0 | 13 | .balign 64 |
michael@0 | 14 | YCbCr42xToRGB565_DITHER03_CONSTS_NEON: |
michael@0 | 15 | .short -14240 |
michael@0 | 16 | .short -14240+384 |
michael@0 | 17 | .short 8672 |
michael@0 | 18 | .short 8672+192 |
michael@0 | 19 | .short -17696 |
michael@0 | 20 | .short -17696+384 |
michael@0 | 21 | .byte 102 |
michael@0 | 22 | .byte 25 |
michael@0 | 23 | .byte 52 |
michael@0 | 24 | .byte 129 |
michael@0 | 25 | YCbCr42xToRGB565_DITHER12_CONSTS_NEON: |
michael@0 | 26 | .short -14240+128 |
michael@0 | 27 | .short -14240+256 |
michael@0 | 28 | .short 8672+64 |
michael@0 | 29 | .short 8672+128 |
michael@0 | 30 | .short -17696+128 |
michael@0 | 31 | .short -17696+256 |
michael@0 | 32 | .byte 102 |
michael@0 | 33 | .byte 25 |
michael@0 | 34 | .byte 52 |
michael@0 | 35 | .byte 129 |
michael@0 | 36 | YCbCr42xToRGB565_DITHER21_CONSTS_NEON: |
michael@0 | 37 | .short -14240+256 |
michael@0 | 38 | .short -14240+128 |
michael@0 | 39 | .short 8672+128 |
michael@0 | 40 | .short 8672+64 |
michael@0 | 41 | .short -17696+256 |
michael@0 | 42 | .short -17696+128 |
michael@0 | 43 | .byte 102 |
michael@0 | 44 | .byte 25 |
michael@0 | 45 | .byte 52 |
michael@0 | 46 | .byte 129 |
michael@0 | 47 | YCbCr42xToRGB565_DITHER30_CONSTS_NEON: |
michael@0 | 48 | .short -14240+384 |
michael@0 | 49 | .short -14240 |
michael@0 | 50 | .short 8672+192 |
michael@0 | 51 | .short 8672 |
michael@0 | 52 | .short -17696+384 |
michael@0 | 53 | .short -17696 |
michael@0 | 54 | .byte 102 |
michael@0 | 55 | .byte 25 |
michael@0 | 56 | .byte 52 |
michael@0 | 57 | .byte 129 |
michael@0 | 58 | |
michael@0 | 59 | @ void ScaleYCbCr42xToRGB565_BilinearY_Row_NEON( |
michael@0 | 60 | @ yuv2rgb565_row_scale_bilinear_ctx *ctx, int dither); |
michael@0 | 61 | @ |
michael@0 | 62 | @ ctx = { |
michael@0 | 63 | @ uint16_t *rgb_row; /*r0*/ |
michael@0 | 64 | @ const uint8_t *y_row; /*r1*/ |
michael@0 | 65 | @ const uint8_t *u_row; /*r2*/ |
michael@0 | 66 | @ const uint8_t *v_row; /*r3*/ |
michael@0 | 67 | @ int y_yweight; /*r4*/ |
michael@0 | 68 | @ int y_pitch; /*r5*/ |
michael@0 | 69 | @ int width; /*r6*/ |
michael@0 | 70 | @ int source_x0_q16; /*r7*/ |
michael@0 | 71 | @ int source_dx_q16; /*r8*/ |
michael@0 | 72 | @ int source_uv_xoffs_q16; /*r9*/ |
michael@0 | 73 | @ }; |
michael@0 | 74 | .global ScaleYCbCr42xToRGB565_BilinearY_Row_NEON |
michael@0 | 75 | .type ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, %function |
michael@0 | 76 | .balign 64 |
michael@0 | 77 | .fnstart |
michael@0 | 78 | ScaleYCbCr42xToRGB565_BilinearY_Row_NEON: |
michael@0 | 79 | STMFD r13!,{r4-r9,r14} @ 8 words. |
michael@0 | 80 | ADR r14,YCbCr42xToRGB565_DITHER03_CONSTS_NEON |
michael@0 | 81 | VPUSH {Q4-Q7} @ 16 words. |
michael@0 | 82 | ADD r14,r14,r1, LSL #4 @ Select the dither table to use |
michael@0 | 83 | LDMIA r0, {r0-r9} |
michael@0 | 84 | @ Set up image index registers. |
michael@0 | 85 | ADD r12,r8, r8 |
michael@0 | 86 | VMOV.I32 D16,#0 @ Q8 = < 2| 2| 0| 0>*source_dx_q16 |
michael@0 | 87 | VDUP.32 D17,r12 |
michael@0 | 88 | ADD r12,r12,r12 |
michael@0 | 89 | VTRN.32 D16,D17 @ Q2 = < 2| 0| 2| 0>*source_dx_q16 |
michael@0 | 90 | VDUP.32 D19,r12 @ Q9 = < 4| 4| ?| ?>*source_dx_q16 |
michael@0 | 91 | ADD r12,r12,r12 |
michael@0 | 92 | VDUP.32 Q0, r7 @ Q0 = < 1| 1| 1| 1>*source_x0_q16 |
michael@0 | 93 | VADD.I32 D17,D17,D19 @ Q8 = < 6| 4| 2| 0>*source_dx_q16 |
michael@0 | 94 | CMP r8, #0 @ If source_dx_q16 is negative... |
michael@0 | 95 | VDUP.32 Q9, r12 @ Q9 = < 8| 8| 8| 8>*source_dx_q16 |
michael@0 | 96 | ADDLT r7, r7, r8, LSL #4 @ Make r7 point to the end of the block |
michael@0 | 97 | VADD.I32 Q0, Q0, Q8 @ Q0 = < 6| 4| 2| 0>*source_dx_q16+source_x0_q16 |
michael@0 | 98 | SUBLT r7, r7, r8 @ (i.e., the lowest address we'll use) |
michael@0 | 99 | VADD.I32 Q1, Q0, Q9 @ Q1 = <14|12|10| 8>*source_dx_q16+source_x0_q16 |
michael@0 | 100 | VDUP.I32 Q9, r8 @ Q8 = < 1| 1| 1| 1>*source_dx_q16 |
michael@0 | 101 | VADD.I32 Q2, Q0, Q9 @ Q2 = < 7| 5| 3| 1>*source_dx_q16+source_x0_q16 |
michael@0 | 102 | VADD.I32 Q3, Q1, Q9 @ Q3 = <15|13|11| 9>*source_dx_q16+source_x0_q16 |
michael@0 | 103 | VLD1.64 {D30,D31},[r14,:128] @ Load some constants |
michael@0 | 104 | VMOV.I8 D28,#52 |
michael@0 | 105 | VMOV.I8 D29,#129 |
michael@0 | 106 | @ The basic idea here is to do aligned loads of a block of data and then |
michael@0 | 107 | @ index into it using VTBL to extract the data from the source X |
michael@0 | 108 | @ coordinate corresponding to each destination pixel. |
michael@0 | 109 | @ This is significantly less code and significantly fewer cycles than doing |
michael@0 | 110 | @ a series of single-lane loads, but it means that the X step between |
michael@0 | 111 | @ pixels must be limited to 2.0 or less, otherwise we couldn't guarantee |
michael@0 | 112 | @ that we could read 8 pixels from a single aligned 32-byte block of data. |
michael@0 | 113 | @ Q0...Q3 contain the 16.16 fixed-point X coordinates of each pixel, |
michael@0 | 114 | @ separated into even pixels and odd pixels to make extracting offsets and |
michael@0 | 115 | @ weights easier. |
michael@0 | 116 | @ We then pull out two bytes from the middle of each coordinate: the top |
michael@0 | 117 | @ byte corresponds to the integer part of the X coordinate, and the bottom |
michael@0 | 118 | @ byte corresponds to the weight to use for bilinear blending. |
michael@0 | 119 | @ These are separated out into different registers with VTRN. |
michael@0 | 120 | @ Then by subtracting the integer X coordinate of the first pixel in the |
michael@0 | 121 | @ data block we loaded, we produce an index register suitable for use by |
michael@0 | 122 | @ VTBL. |
michael@0 | 123 | s42xbily_neon_loop: |
michael@0 | 124 | @ Load the Y' data. |
michael@0 | 125 | MOV r12,r7, ASR #16 |
michael@0 | 126 | VRSHRN.S32 D16,Q0, #8 |
michael@0 | 127 | AND r12,r12,#~15 @ Read 16-byte aligned blocks |
michael@0 | 128 | VDUP.I8 D20,r12 |
michael@0 | 129 | ADD r12,r1, r12 @ r12 = y_row+(source_x&~7) |
michael@0 | 130 | VRSHRN.S32 D17,Q1, #8 |
michael@0 | 131 | PLD [r12,#64] |
michael@0 | 132 | VLD1.64 {D8, D9, D10,D11},[r12,:128],r5 @ Load Y' top row |
michael@0 | 133 | ADD r14,r7, r8, LSL #3 |
michael@0 | 134 | VRSHRN.S32 D18,Q2, #8 |
michael@0 | 135 | MOV r14,r14,ASR #16 |
michael@0 | 136 | VRSHRN.S32 D19,Q3, #8 |
michael@0 | 137 | AND r14,r14,#~15 @ Read 16-byte aligned blocks |
michael@0 | 138 | VLD1.64 {D12,D13,D14,D15},[r12,:128] @ Load Y' bottom row |
michael@0 | 139 | PLD [r12,#64] |
michael@0 | 140 | VDUP.I8 D21,r14 |
michael@0 | 141 | ADD r14,r1, r14 @ r14 = y_row+(source_x&~7) |
michael@0 | 142 | VMOV.I8 Q13,#1 |
michael@0 | 143 | PLD [r14,#64] |
michael@0 | 144 | VTRN.8 Q8, Q9 @ Q8 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0> |
michael@0 | 145 | @ Q9 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0> |
michael@0 | 146 | VSUB.S8 Q9, Q9, Q10 @ Make offsets relative to the data we loaded. |
michael@0 | 147 | @ First 8 Y' pixels |
michael@0 | 148 | VTBL.8 D20,{D8, D9, D10,D11},D18 @ Index top row at source_x |
michael@0 | 149 | VTBL.8 D24,{D12,D13,D14,D15},D18 @ Index bottom row at source_x |
michael@0 | 150 | VADD.S8 Q13,Q9, Q13 @ Add 1 to source_x |
michael@0 | 151 | VTBL.8 D22,{D8, D9, D10,D11},D26 @ Index top row at source_x+1 |
michael@0 | 152 | VTBL.8 D26,{D12,D13,D14,D15},D26 @ Index bottom row at source_x+1 |
michael@0 | 153 | @ Next 8 Y' pixels |
michael@0 | 154 | VLD1.64 {D8, D9, D10,D11},[r14,:128],r5 @ Load Y' top row |
michael@0 | 155 | VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Y' bottom row |
michael@0 | 156 | PLD [r14,#64] |
michael@0 | 157 | VTBL.8 D21,{D8, D9, D10,D11},D19 @ Index top row at source_x |
michael@0 | 158 | VTBL.8 D25,{D12,D13,D14,D15},D19 @ Index bottom row at source_x |
michael@0 | 159 | VTBL.8 D23,{D8, D9, D10,D11},D27 @ Index top row at source_x+1 |
michael@0 | 160 | VTBL.8 D27,{D12,D13,D14,D15},D27 @ Index bottom row at source_x+1 |
michael@0 | 161 | @ Blend Y'. |
michael@0 | 162 | VDUP.I16 Q9, r4 @ Load the y weights. |
michael@0 | 163 | VSUBL.U8 Q4, D24,D20 @ Q5:Q4 = c-a |
michael@0 | 164 | VSUBL.U8 Q5, D25,D21 |
michael@0 | 165 | VSUBL.U8 Q6, D26,D22 @ Q7:Q6 = d-b |
michael@0 | 166 | VSUBL.U8 Q7, D27,D23 |
michael@0 | 167 | VMUL.S16 Q4, Q4, Q9 @ Q5:Q4 = (c-a)*yweight |
michael@0 | 168 | VMUL.S16 Q5, Q5, Q9 |
michael@0 | 169 | VMUL.S16 Q6, Q6, Q9 @ Q7:Q6 = (d-b)*yweight |
michael@0 | 170 | VMUL.S16 Q7, Q7, Q9 |
michael@0 | 171 | VMOVL.U8 Q12,D16 @ Promote the x weights to 16 bits. |
michael@0 | 172 | VMOVL.U8 Q13,D17 @ Sadly, there's no VMULW. |
michael@0 | 173 | VRSHRN.S16 D8, Q4, #8 @ Q4 = (c-a)*yweight+128>>8 |
michael@0 | 174 | VRSHRN.S16 D9, Q5, #8 |
michael@0 | 175 | VRSHRN.S16 D12,Q6, #8 @ Q6 = (d-b)*yweight+128>>8 |
michael@0 | 176 | VRSHRN.S16 D13,Q7, #8 |
michael@0 | 177 | VADD.I8 Q10,Q10,Q4 @ Q10 = a+((c-a)*yweight+128>>8) |
michael@0 | 178 | VADD.I8 Q11,Q11,Q6 @ Q11 = b+((d-b)*yweight+128>>8) |
michael@0 | 179 | VSUBL.U8 Q4, D22,D20 @ Q5:Q4 = b-a |
michael@0 | 180 | VSUBL.U8 Q5, D23,D21 |
michael@0 | 181 | VMUL.S16 Q4, Q4, Q12 @ Q5:Q4 = (b-a)*xweight |
michael@0 | 182 | VMUL.S16 Q5, Q5, Q13 |
michael@0 | 183 | VRSHRN.S16 D8, Q4, #8 @ Q4 = (b-a)*xweight+128>>8 |
michael@0 | 184 | ADD r12,r7, r9 |
michael@0 | 185 | VRSHRN.S16 D9, Q5, #8 |
michael@0 | 186 | MOV r12,r12,ASR #17 |
michael@0 | 187 | VADD.I8 Q8, Q10,Q4 @ Q8 = a+((b-a)*xweight+128>>8) |
michael@0 | 188 | @ Start extracting the chroma x coordinates, and load Cb and Cr. |
michael@0 | 189 | AND r12,r12,#~15 @ Read 16-byte aligned blocks |
michael@0 | 190 | VDUP.I32 Q9, r9 @ Q9 = source_uv_xoffs_q16 x 4 |
michael@0 | 191 | ADD r14,r2, r12 |
michael@0 | 192 | VADD.I32 Q10,Q0, Q9 |
michael@0 | 193 | VLD1.64 {D8, D9, D10,D11},[r14,:128] @ Load Cb |
michael@0 | 194 | PLD [r14,#64] |
michael@0 | 195 | VADD.I32 Q11,Q1, Q9 |
michael@0 | 196 | ADD r14,r3, r12 |
michael@0 | 197 | VADD.I32 Q12,Q2, Q9 |
michael@0 | 198 | VLD1.64 {D12,D13,D14,D15},[r14,:128] @ Load Cr |
michael@0 | 199 | PLD [r14,#64] |
michael@0 | 200 | VADD.I32 Q13,Q3, Q9 |
michael@0 | 201 | VRSHRN.S32 D20,Q10,#9 @ Q10 = <xEwExCwCxAwAx8w8x6w6x4w4x2w2x0w0> |
michael@0 | 202 | VRSHRN.S32 D21,Q11,#9 |
michael@0 | 203 | VDUP.I8 Q9, r12 |
michael@0 | 204 | VRSHRN.S32 D22,Q12,#9 @ Q11 = <xFwFxDwDxBwBx9w9x7w7x5w5x3w3x1w1> |
michael@0 | 205 | VRSHRN.S32 D23,Q13,#9 |
michael@0 | 206 | @ We don't actually need the x weights, but we get them for free. |
michael@0 | 207 | @ Free ALU slot |
michael@0 | 208 | VTRN.8 Q10,Q11 @ Q10 = <wFwEwDwCwBwAw9w8w7w6w5w4w3w2w1w0> |
michael@0 | 209 | @ Free ALU slot @ Q11 = <xFxExDxCxBxAx9x8x7x6x5x4x3x2x1x0> |
michael@0 | 210 | VSUB.S8 Q11,Q11,Q9 @ Make offsets relative to the data we loaded. |
michael@0 | 211 | VTBL.8 D18,{D8, D9, D10,D11},D22 @ Index Cb at source_x |
michael@0 | 212 | VMOV.I8 D24,#74 |
michael@0 | 213 | VTBL.8 D19,{D8, D9, D10,D11},D23 |
michael@0 | 214 | VMOV.I8 D26,#102 |
michael@0 | 215 | VTBL.8 D20,{D12,D13,D14,D15},D22 @ Index Cr at source_x |
michael@0 | 216 | VMOV.I8 D27,#25 |
michael@0 | 217 | VTBL.8 D21,{D12,D13,D14,D15},D23 |
michael@0 | 218 | @ We now have Y' in Q8, Cb in Q9, and Cr in Q10 |
michael@0 | 219 | @ We use VDUP to expand constants, because it's a permute instruction, so |
michael@0 | 220 | @ it can dual issue on the A8. |
michael@0 | 221 | SUBS r6, r6, #16 @ width -= 16 |
michael@0 | 222 | VMULL.U8 Q4, D16,D24 @ Q5:Q4 = Y'*74 |
michael@0 | 223 | VDUP.32 Q6, D30[1] @ Q7:Q6 = bias_G |
michael@0 | 224 | VMULL.U8 Q5, D17,D24 |
michael@0 | 225 | VDUP.32 Q7, D30[1] |
michael@0 | 226 | VMLSL.U8 Q6, D18,D27 @ Q7:Q6 = -25*Cb+bias_G |
michael@0 | 227 | VDUP.32 Q11,D30[0] @ Q12:Q11 = bias_R |
michael@0 | 228 | VMLSL.U8 Q7, D19,D27 |
michael@0 | 229 | VDUP.32 Q12,D30[0] |
michael@0 | 230 | VMLAL.U8 Q11,D20,D26 @ Q12:Q11 = 102*Cr+bias_R |
michael@0 | 231 | VDUP.32 Q8, D31[0] @ Q13:Q8 = bias_B |
michael@0 | 232 | VMLAL.U8 Q12,D21,D26 |
michael@0 | 233 | VDUP.32 Q13,D31[0] |
michael@0 | 234 | VMLAL.U8 Q8, D18,D29 @ Q13:Q8 = 129*Cb+bias_B |
michael@0 | 235 | VMLAL.U8 Q13,D19,D29 |
michael@0 | 236 | VMLSL.U8 Q6, D20,D28 @ Q7:Q6 = -25*Cb-52*Cr+bias_G |
michael@0 | 237 | VMLSL.U8 Q7, D21,D28 |
michael@0 | 238 | VADD.S16 Q11,Q4, Q11 @ Q12:Q11 = 74*Y'+102*Cr+bias_R |
michael@0 | 239 | VADD.S16 Q12,Q5, Q12 |
michael@0 | 240 | VQADD.S16 Q8, Q4, Q8 @ Q13:Q8 = 74*Y'+129*Cr+bias_B |
michael@0 | 241 | VQADD.S16 Q13,Q5, Q13 |
michael@0 | 242 | VADD.S16 Q6, Q4, Q6 @ Q7:Q6 = 74*Y'-25*Cb-52*Cr+bias_G |
michael@0 | 243 | VADD.S16 Q7, Q5, Q7 |
michael@0 | 244 | @ Push each value to the top of its word and saturate it. |
michael@0 | 245 | VQSHLU.S16 Q11,Q11,#2 |
michael@0 | 246 | VQSHLU.S16 Q12,Q12,#2 |
michael@0 | 247 | VQSHLU.S16 Q6, Q6, #2 |
michael@0 | 248 | VQSHLU.S16 Q7, Q7, #2 |
michael@0 | 249 | VQSHLU.S16 Q8, Q8, #2 |
michael@0 | 250 | VQSHLU.S16 Q13,Q13,#2 |
michael@0 | 251 | @ Merge G and B into R. |
michael@0 | 252 | VSRI.U16 Q11,Q6, #5 |
michael@0 | 253 | VSRI.U16 Q12,Q7, #5 |
michael@0 | 254 | VSRI.U16 Q11,Q8, #11 |
michael@0 | 255 | MOV r14,r8, LSL #4 |
michael@0 | 256 | VSRI.U16 Q12,Q13,#11 |
michael@0 | 257 | BLT s42xbily_neon_tail |
michael@0 | 258 | VDUP.I32 Q13,r14 |
michael@0 | 259 | @ Store the result. |
michael@0 | 260 | VST1.16 {D22,D23,D24,D25},[r0]! |
michael@0 | 261 | BEQ s42xbily_neon_done |
michael@0 | 262 | @ Advance the x coordinates. |
michael@0 | 263 | VADD.I32 Q0, Q0, Q13 |
michael@0 | 264 | VADD.I32 Q1, Q1, Q13 |
michael@0 | 265 | ADD r7, r14 |
michael@0 | 266 | VADD.I32 Q2, Q2, Q13 |
michael@0 | 267 | VADD.I32 Q3, Q3, Q13 |
michael@0 | 268 | B s42xbily_neon_loop |
michael@0 | 269 | s42xbily_neon_tail: |
michael@0 | 270 | @ We have between 1 and 15 pixels left to write. |
michael@0 | 271 | @ -r6 == the number of pixels we need to skip writing. |
michael@0 | 272 | @ Adjust r0 to point to the last one we need to write, because we're going |
michael@0 | 273 | @ to write them in reverse order. |
michael@0 | 274 | ADD r0, r0, r6, LSL #1 |
michael@0 | 275 | MOV r14,#-2 |
michael@0 | 276 | ADD r0, r0, #30 |
michael@0 | 277 | @ Skip past the ones we don't need to write. |
michael@0 | 278 | SUB PC, PC, r6, LSL #2 |
michael@0 | 279 | ORR r0, r0, r0 |
michael@0 | 280 | VST1.16 {D25[3]},[r0,:16],r14 |
michael@0 | 281 | VST1.16 {D25[2]},[r0,:16],r14 |
michael@0 | 282 | VST1.16 {D25[1]},[r0,:16],r14 |
michael@0 | 283 | VST1.16 {D25[0]},[r0,:16],r14 |
michael@0 | 284 | VST1.16 {D24[3]},[r0,:16],r14 |
michael@0 | 285 | VST1.16 {D24[2]},[r0,:16],r14 |
michael@0 | 286 | VST1.16 {D24[1]},[r0,:16],r14 |
michael@0 | 287 | VST1.16 {D24[0]},[r0,:16],r14 |
michael@0 | 288 | VST1.16 {D23[3]},[r0,:16],r14 |
michael@0 | 289 | VST1.16 {D23[2]},[r0,:16],r14 |
michael@0 | 290 | VST1.16 {D23[1]},[r0,:16],r14 |
michael@0 | 291 | VST1.16 {D23[0]},[r0,:16],r14 |
michael@0 | 292 | VST1.16 {D22[3]},[r0,:16],r14 |
michael@0 | 293 | VST1.16 {D22[2]},[r0,:16],r14 |
michael@0 | 294 | VST1.16 {D22[1]},[r0,:16],r14 |
michael@0 | 295 | VST1.16 {D22[0]},[r0,:16] |
michael@0 | 296 | s42xbily_neon_done: |
michael@0 | 297 | VPOP {Q4-Q7} @ 16 words. |
michael@0 | 298 | LDMFD r13!,{r4-r9,PC} @ 8 words. |
michael@0 | 299 | .fnend |
michael@0 | 300 | .size ScaleYCbCr42xToRGB565_BilinearY_Row_NEON, .-ScaleYCbCr42xToRGB565_BilinearY_Row_NEON |
michael@0 | 301 | |
michael@0 | 302 | #if defined(__ELF__)&&defined(__linux__) |
michael@0 | 303 | .section .note.GNU-stack,"",%progbits |
michael@0 | 304 | #endif |