media/libyuv/source/compare_neon.cc

Thu, 22 Jan 2015 13:21:57 +0100

author
Michael Schloh von Bennewitz <michael@schloh.com>
date
Thu, 22 Jan 2015 13:21:57 +0100
branch
TOR_BUG_9701
changeset 15
b8a032363ba2
permissions
-rw-r--r--

Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6

michael@0 1 /*
michael@0 2 * Copyright 2012 The LibYuv Project Authors. All rights reserved.
michael@0 3 *
michael@0 4 * Use of this source code is governed by a BSD-style license
michael@0 5 * that can be found in the LICENSE file in the root of the source
michael@0 6 * tree. An additional intellectual property rights grant can be found
michael@0 7 * in the file PATENTS. All contributing project authors may
michael@0 8 * be found in the AUTHORS file in the root of the source tree.
michael@0 9 */
michael@0 10
michael@0 11 #include "libyuv/basic_types.h"
michael@0 12
michael@0 13 #ifdef __cplusplus
michael@0 14 namespace libyuv {
michael@0 15 extern "C" {
michael@0 16 #endif
michael@0 17
michael@0 18 #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)
michael@0 19
michael@0 20 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
michael@0 21 volatile uint32 sse;
michael@0 22 asm volatile (
michael@0 23 "vmov.u8 q8, #0 \n"
michael@0 24 "vmov.u8 q10, #0 \n"
michael@0 25 "vmov.u8 q9, #0 \n"
michael@0 26 "vmov.u8 q11, #0 \n"
michael@0 27
michael@0 28 ".p2align 2 \n"
michael@0 29 "1: \n"
michael@0 30 "vld1.8 {q0}, [%0]! \n"
michael@0 31 "vld1.8 {q1}, [%1]! \n"
michael@0 32 "subs %2, %2, #16 \n"
michael@0 33 "vsubl.u8 q2, d0, d2 \n"
michael@0 34 "vsubl.u8 q3, d1, d3 \n"
michael@0 35 "vmlal.s16 q8, d4, d4 \n"
michael@0 36 "vmlal.s16 q9, d6, d6 \n"
michael@0 37 "vmlal.s16 q10, d5, d5 \n"
michael@0 38 "vmlal.s16 q11, d7, d7 \n"
michael@0 39 "bgt 1b \n"
michael@0 40
michael@0 41 "vadd.u32 q8, q8, q9 \n"
michael@0 42 "vadd.u32 q10, q10, q11 \n"
michael@0 43 "vadd.u32 q11, q8, q10 \n"
michael@0 44 "vpaddl.u32 q1, q11 \n"
michael@0 45 "vadd.u64 d0, d2, d3 \n"
michael@0 46 "vmov.32 %3, d0[0] \n"
michael@0 47 : "+r"(src_a),
michael@0 48 "+r"(src_b),
michael@0 49 "+r"(count),
michael@0 50 "=r"(sse)
michael@0 51 :
michael@0 52 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
michael@0 53 return sse;
michael@0 54 }
michael@0 55
michael@0 56 #endif // __ARM_NEON__
michael@0 57
michael@0 58 #ifdef __cplusplus
michael@0 59 } // extern "C"
michael@0 60 } // namespace libyuv
michael@0 61 #endif

mercurial