Wed, 31 Dec 2014 06:09:35 +0100
Cloned upstream origin tor-browser at tor-browser-31.3.0esr-4.5-1-build1
revision ID fc1c9ff7c1b2defdbc039f12214767608f46423f for hacking purpose.
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright 2012 The LibYuv Project Authors. All rights reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include "libyuv/basic_types.h" |
michael@0 | 12 | |
michael@0 | 13 | #ifdef __cplusplus |
michael@0 | 14 | namespace libyuv { |
michael@0 | 15 | extern "C" { |
michael@0 | 16 | #endif |
michael@0 | 17 | |
michael@0 | 18 | #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) |
michael@0 | 19 | |
michael@0 | 20 | uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { |
michael@0 | 21 | volatile uint32 sse; |
michael@0 | 22 | asm volatile ( |
michael@0 | 23 | "vmov.u8 q8, #0 \n" |
michael@0 | 24 | "vmov.u8 q10, #0 \n" |
michael@0 | 25 | "vmov.u8 q9, #0 \n" |
michael@0 | 26 | "vmov.u8 q11, #0 \n" |
michael@0 | 27 | |
michael@0 | 28 | ".p2align 2 \n" |
michael@0 | 29 | "1: \n" |
michael@0 | 30 | "vld1.8 {q0}, [%0]! \n" |
michael@0 | 31 | "vld1.8 {q1}, [%1]! \n" |
michael@0 | 32 | "subs %2, %2, #16 \n" |
michael@0 | 33 | "vsubl.u8 q2, d0, d2 \n" |
michael@0 | 34 | "vsubl.u8 q3, d1, d3 \n" |
michael@0 | 35 | "vmlal.s16 q8, d4, d4 \n" |
michael@0 | 36 | "vmlal.s16 q9, d6, d6 \n" |
michael@0 | 37 | "vmlal.s16 q10, d5, d5 \n" |
michael@0 | 38 | "vmlal.s16 q11, d7, d7 \n" |
michael@0 | 39 | "bgt 1b \n" |
michael@0 | 40 | |
michael@0 | 41 | "vadd.u32 q8, q8, q9 \n" |
michael@0 | 42 | "vadd.u32 q10, q10, q11 \n" |
michael@0 | 43 | "vadd.u32 q11, q8, q10 \n" |
michael@0 | 44 | "vpaddl.u32 q1, q11 \n" |
michael@0 | 45 | "vadd.u64 d0, d2, d3 \n" |
michael@0 | 46 | "vmov.32 %3, d0[0] \n" |
michael@0 | 47 | : "+r"(src_a), |
michael@0 | 48 | "+r"(src_b), |
michael@0 | 49 | "+r"(count), |
michael@0 | 50 | "=r"(sse) |
michael@0 | 51 | : |
michael@0 | 52 | : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); |
michael@0 | 53 | return sse; |
michael@0 | 54 | } |
michael@0 | 55 | |
michael@0 | 56 | #endif // __ARM_NEON__ |
michael@0 | 57 | |
michael@0 | 58 | #ifdef __cplusplus |
michael@0 | 59 | } // extern "C" |
michael@0 | 60 | } // namespace libyuv |
michael@0 | 61 | #endif |