1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/media/libyuv/source/compare_neon.cc Wed Dec 31 06:09:35 2014 +0100 1.3 @@ -0,0 +1,61 @@ 1.4 +/* 1.5 + * Copyright 2012 The LibYuv Project Authors. All rights reserved. 1.6 + * 1.7 + * Use of this source code is governed by a BSD-style license 1.8 + * that can be found in the LICENSE file in the root of the source 1.9 + * tree. An additional intellectual property rights grant can be found 1.10 + * in the file PATENTS. All contributing project authors may 1.11 + * be found in the AUTHORS file in the root of the source tree. 1.12 + */ 1.13 + 1.14 +#include "libyuv/basic_types.h" 1.15 + 1.16 +#ifdef __cplusplus 1.17 +namespace libyuv { 1.18 +extern "C" { 1.19 +#endif 1.20 + 1.21 +#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) 1.22 + 1.23 +uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) { 1.24 + volatile uint32 sse; 1.25 + asm volatile ( 1.26 + "vmov.u8 q8, #0 \n" 1.27 + "vmov.u8 q10, #0 \n" 1.28 + "vmov.u8 q9, #0 \n" 1.29 + "vmov.u8 q11, #0 \n" 1.30 + 1.31 + ".p2align 2 \n" 1.32 + "1: \n" 1.33 + "vld1.8 {q0}, [%0]! \n" 1.34 + "vld1.8 {q1}, [%1]! \n" 1.35 + "subs %2, %2, #16 \n" 1.36 + "vsubl.u8 q2, d0, d2 \n" 1.37 + "vsubl.u8 q3, d1, d3 \n" 1.38 + "vmlal.s16 q8, d4, d4 \n" 1.39 + "vmlal.s16 q9, d6, d6 \n" 1.40 + "vmlal.s16 q10, d5, d5 \n" 1.41 + "vmlal.s16 q11, d7, d7 \n" 1.42 + "bgt 1b \n" 1.43 + 1.44 + "vadd.u32 q8, q8, q9 \n" 1.45 + "vadd.u32 q10, q10, q11 \n" 1.46 + "vadd.u32 q11, q8, q10 \n" 1.47 + "vpaddl.u32 q1, q11 \n" 1.48 + "vadd.u64 d0, d2, d3 \n" 1.49 + "vmov.32 %3, d0[0] \n" 1.50 + : "+r"(src_a), 1.51 + "+r"(src_b), 1.52 + "+r"(count), 1.53 + "=r"(sse) 1.54 + : 1.55 + : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); 1.56 + return sse; 1.57 +} 1.58 + 1.59 +#endif // __ARM_NEON__ 1.60 + 1.61 +#ifdef __cplusplus 1.62 +} // extern "C" 1.63 +} // namespace libyuv 1.64 +#endif