Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
michael@0 | 1 | /* |
michael@0 | 2 | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
michael@0 | 3 | * |
michael@0 | 4 | * Use of this source code is governed by a BSD-style license |
michael@0 | 5 | * that can be found in the LICENSE file in the root of the source |
michael@0 | 6 | * tree. An additional intellectual property rights grant can be found |
michael@0 | 7 | * in the file PATENTS. All contributing project authors may |
michael@0 | 8 | * be found in the AUTHORS file in the root of the source tree. |
michael@0 | 9 | */ |
michael@0 | 10 | |
michael@0 | 11 | #include <assert.h> |
michael@0 | 12 | #include <math.h> |
michael@0 | 13 | |
michael@0 | 14 | #include "./vpx_config.h" |
michael@0 | 15 | #include "./vp9_rtcd.h" |
michael@0 | 16 | #include "vp9/common/vp9_systemdependent.h" |
michael@0 | 17 | #include "vp9/common/vp9_blockd.h" |
michael@0 | 18 | #include "vp9/common/vp9_common.h" |
michael@0 | 19 | #include "vp9/common/vp9_idct.h" |
michael@0 | 20 | |
michael@0 | 21 | void vp9_iwht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 22 | /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, |
michael@0 | 23 | 0.5 shifts per pixel. */ |
michael@0 | 24 | int i; |
michael@0 | 25 | int16_t output[16]; |
michael@0 | 26 | int a1, b1, c1, d1, e1; |
michael@0 | 27 | const int16_t *ip = input; |
michael@0 | 28 | int16_t *op = output; |
michael@0 | 29 | |
michael@0 | 30 | for (i = 0; i < 4; i++) { |
michael@0 | 31 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
michael@0 | 32 | c1 = ip[1] >> UNIT_QUANT_SHIFT; |
michael@0 | 33 | d1 = ip[2] >> UNIT_QUANT_SHIFT; |
michael@0 | 34 | b1 = ip[3] >> UNIT_QUANT_SHIFT; |
michael@0 | 35 | a1 += c1; |
michael@0 | 36 | d1 -= b1; |
michael@0 | 37 | e1 = (a1 - d1) >> 1; |
michael@0 | 38 | b1 = e1 - b1; |
michael@0 | 39 | c1 = e1 - c1; |
michael@0 | 40 | a1 -= b1; |
michael@0 | 41 | d1 += c1; |
michael@0 | 42 | op[0] = a1; |
michael@0 | 43 | op[1] = b1; |
michael@0 | 44 | op[2] = c1; |
michael@0 | 45 | op[3] = d1; |
michael@0 | 46 | ip += 4; |
michael@0 | 47 | op += 4; |
michael@0 | 48 | } |
michael@0 | 49 | |
michael@0 | 50 | ip = output; |
michael@0 | 51 | for (i = 0; i < 4; i++) { |
michael@0 | 52 | a1 = ip[4 * 0]; |
michael@0 | 53 | c1 = ip[4 * 1]; |
michael@0 | 54 | d1 = ip[4 * 2]; |
michael@0 | 55 | b1 = ip[4 * 3]; |
michael@0 | 56 | a1 += c1; |
michael@0 | 57 | d1 -= b1; |
michael@0 | 58 | e1 = (a1 - d1) >> 1; |
michael@0 | 59 | b1 = e1 - b1; |
michael@0 | 60 | c1 = e1 - c1; |
michael@0 | 61 | a1 -= b1; |
michael@0 | 62 | d1 += c1; |
michael@0 | 63 | dest[stride * 0] = clip_pixel(dest[stride * 0] + a1); |
michael@0 | 64 | dest[stride * 1] = clip_pixel(dest[stride * 1] + b1); |
michael@0 | 65 | dest[stride * 2] = clip_pixel(dest[stride * 2] + c1); |
michael@0 | 66 | dest[stride * 3] = clip_pixel(dest[stride * 3] + d1); |
michael@0 | 67 | |
michael@0 | 68 | ip++; |
michael@0 | 69 | dest++; |
michael@0 | 70 | } |
michael@0 | 71 | } |
michael@0 | 72 | |
michael@0 | 73 | void vp9_iwht4x4_1_add_c(const int16_t *in, uint8_t *dest, int dest_stride) { |
michael@0 | 74 | int i; |
michael@0 | 75 | int a1, e1; |
michael@0 | 76 | int16_t tmp[4]; |
michael@0 | 77 | const int16_t *ip = in; |
michael@0 | 78 | int16_t *op = tmp; |
michael@0 | 79 | |
michael@0 | 80 | a1 = ip[0] >> UNIT_QUANT_SHIFT; |
michael@0 | 81 | e1 = a1 >> 1; |
michael@0 | 82 | a1 -= e1; |
michael@0 | 83 | op[0] = a1; |
michael@0 | 84 | op[1] = op[2] = op[3] = e1; |
michael@0 | 85 | |
michael@0 | 86 | ip = tmp; |
michael@0 | 87 | for (i = 0; i < 4; i++) { |
michael@0 | 88 | e1 = ip[0] >> 1; |
michael@0 | 89 | a1 = ip[0] - e1; |
michael@0 | 90 | dest[dest_stride * 0] = clip_pixel(dest[dest_stride * 0] + a1); |
michael@0 | 91 | dest[dest_stride * 1] = clip_pixel(dest[dest_stride * 1] + e1); |
michael@0 | 92 | dest[dest_stride * 2] = clip_pixel(dest[dest_stride * 2] + e1); |
michael@0 | 93 | dest[dest_stride * 3] = clip_pixel(dest[dest_stride * 3] + e1); |
michael@0 | 94 | ip++; |
michael@0 | 95 | dest++; |
michael@0 | 96 | } |
michael@0 | 97 | } |
michael@0 | 98 | |
michael@0 | 99 | static void idct4_1d(const int16_t *input, int16_t *output) { |
michael@0 | 100 | int16_t step[4]; |
michael@0 | 101 | int temp1, temp2; |
michael@0 | 102 | // stage 1 |
michael@0 | 103 | temp1 = (input[0] + input[2]) * cospi_16_64; |
michael@0 | 104 | temp2 = (input[0] - input[2]) * cospi_16_64; |
michael@0 | 105 | step[0] = dct_const_round_shift(temp1); |
michael@0 | 106 | step[1] = dct_const_round_shift(temp2); |
michael@0 | 107 | temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; |
michael@0 | 108 | temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; |
michael@0 | 109 | step[2] = dct_const_round_shift(temp1); |
michael@0 | 110 | step[3] = dct_const_round_shift(temp2); |
michael@0 | 111 | |
michael@0 | 112 | // stage 2 |
michael@0 | 113 | output[0] = step[0] + step[3]; |
michael@0 | 114 | output[1] = step[1] + step[2]; |
michael@0 | 115 | output[2] = step[1] - step[2]; |
michael@0 | 116 | output[3] = step[0] - step[3]; |
michael@0 | 117 | } |
michael@0 | 118 | |
michael@0 | 119 | void vp9_idct4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 120 | int16_t out[4 * 4]; |
michael@0 | 121 | int16_t *outptr = out; |
michael@0 | 122 | int i, j; |
michael@0 | 123 | int16_t temp_in[4], temp_out[4]; |
michael@0 | 124 | |
michael@0 | 125 | // Rows |
michael@0 | 126 | for (i = 0; i < 4; ++i) { |
michael@0 | 127 | idct4_1d(input, outptr); |
michael@0 | 128 | input += 4; |
michael@0 | 129 | outptr += 4; |
michael@0 | 130 | } |
michael@0 | 131 | |
michael@0 | 132 | // Columns |
michael@0 | 133 | for (i = 0; i < 4; ++i) { |
michael@0 | 134 | for (j = 0; j < 4; ++j) |
michael@0 | 135 | temp_in[j] = out[j * 4 + i]; |
michael@0 | 136 | idct4_1d(temp_in, temp_out); |
michael@0 | 137 | for (j = 0; j < 4; ++j) |
michael@0 | 138 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
michael@0 | 139 | + dest[j * stride + i]); |
michael@0 | 140 | } |
michael@0 | 141 | } |
michael@0 | 142 | |
michael@0 | 143 | void vp9_idct4x4_1_add_c(const int16_t *input, uint8_t *dest, int dest_stride) { |
michael@0 | 144 | int i; |
michael@0 | 145 | int a1; |
michael@0 | 146 | int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
michael@0 | 147 | out = dct_const_round_shift(out * cospi_16_64); |
michael@0 | 148 | a1 = ROUND_POWER_OF_TWO(out, 4); |
michael@0 | 149 | |
michael@0 | 150 | for (i = 0; i < 4; i++) { |
michael@0 | 151 | dest[0] = clip_pixel(dest[0] + a1); |
michael@0 | 152 | dest[1] = clip_pixel(dest[1] + a1); |
michael@0 | 153 | dest[2] = clip_pixel(dest[2] + a1); |
michael@0 | 154 | dest[3] = clip_pixel(dest[3] + a1); |
michael@0 | 155 | dest += dest_stride; |
michael@0 | 156 | } |
michael@0 | 157 | } |
michael@0 | 158 | |
michael@0 | 159 | static void idct8_1d(const int16_t *input, int16_t *output) { |
michael@0 | 160 | int16_t step1[8], step2[8]; |
michael@0 | 161 | int temp1, temp2; |
michael@0 | 162 | // stage 1 |
michael@0 | 163 | step1[0] = input[0]; |
michael@0 | 164 | step1[2] = input[4]; |
michael@0 | 165 | step1[1] = input[2]; |
michael@0 | 166 | step1[3] = input[6]; |
michael@0 | 167 | temp1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; |
michael@0 | 168 | temp2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; |
michael@0 | 169 | step1[4] = dct_const_round_shift(temp1); |
michael@0 | 170 | step1[7] = dct_const_round_shift(temp2); |
michael@0 | 171 | temp1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; |
michael@0 | 172 | temp2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; |
michael@0 | 173 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 174 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 175 | |
michael@0 | 176 | // stage 2 & stage 3 - even half |
michael@0 | 177 | idct4_1d(step1, step1); |
michael@0 | 178 | |
michael@0 | 179 | // stage 2 - odd half |
michael@0 | 180 | step2[4] = step1[4] + step1[5]; |
michael@0 | 181 | step2[5] = step1[4] - step1[5]; |
michael@0 | 182 | step2[6] = -step1[6] + step1[7]; |
michael@0 | 183 | step2[7] = step1[6] + step1[7]; |
michael@0 | 184 | |
michael@0 | 185 | // stage 3 -odd half |
michael@0 | 186 | step1[4] = step2[4]; |
michael@0 | 187 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
michael@0 | 188 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
michael@0 | 189 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 190 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 191 | step1[7] = step2[7]; |
michael@0 | 192 | |
michael@0 | 193 | // stage 4 |
michael@0 | 194 | output[0] = step1[0] + step1[7]; |
michael@0 | 195 | output[1] = step1[1] + step1[6]; |
michael@0 | 196 | output[2] = step1[2] + step1[5]; |
michael@0 | 197 | output[3] = step1[3] + step1[4]; |
michael@0 | 198 | output[4] = step1[3] - step1[4]; |
michael@0 | 199 | output[5] = step1[2] - step1[5]; |
michael@0 | 200 | output[6] = step1[1] - step1[6]; |
michael@0 | 201 | output[7] = step1[0] - step1[7]; |
michael@0 | 202 | } |
michael@0 | 203 | |
michael@0 | 204 | void vp9_idct8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 205 | int16_t out[8 * 8]; |
michael@0 | 206 | int16_t *outptr = out; |
michael@0 | 207 | int i, j; |
michael@0 | 208 | int16_t temp_in[8], temp_out[8]; |
michael@0 | 209 | |
michael@0 | 210 | // First transform rows |
michael@0 | 211 | for (i = 0; i < 8; ++i) { |
michael@0 | 212 | idct8_1d(input, outptr); |
michael@0 | 213 | input += 8; |
michael@0 | 214 | outptr += 8; |
michael@0 | 215 | } |
michael@0 | 216 | |
michael@0 | 217 | // Then transform columns |
michael@0 | 218 | for (i = 0; i < 8; ++i) { |
michael@0 | 219 | for (j = 0; j < 8; ++j) |
michael@0 | 220 | temp_in[j] = out[j * 8 + i]; |
michael@0 | 221 | idct8_1d(temp_in, temp_out); |
michael@0 | 222 | for (j = 0; j < 8; ++j) |
michael@0 | 223 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
michael@0 | 224 | + dest[j * stride + i]); |
michael@0 | 225 | } |
michael@0 | 226 | } |
michael@0 | 227 | |
michael@0 | 228 | void vp9_idct8x8_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 229 | int i, j; |
michael@0 | 230 | int a1; |
michael@0 | 231 | int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
michael@0 | 232 | out = dct_const_round_shift(out * cospi_16_64); |
michael@0 | 233 | a1 = ROUND_POWER_OF_TWO(out, 5); |
michael@0 | 234 | for (j = 0; j < 8; ++j) { |
michael@0 | 235 | for (i = 0; i < 8; ++i) |
michael@0 | 236 | dest[i] = clip_pixel(dest[i] + a1); |
michael@0 | 237 | dest += stride; |
michael@0 | 238 | } |
michael@0 | 239 | } |
michael@0 | 240 | |
michael@0 | 241 | static void iadst4_1d(const int16_t *input, int16_t *output) { |
michael@0 | 242 | int s0, s1, s2, s3, s4, s5, s6, s7; |
michael@0 | 243 | |
michael@0 | 244 | int x0 = input[0]; |
michael@0 | 245 | int x1 = input[1]; |
michael@0 | 246 | int x2 = input[2]; |
michael@0 | 247 | int x3 = input[3]; |
michael@0 | 248 | |
michael@0 | 249 | if (!(x0 | x1 | x2 | x3)) { |
michael@0 | 250 | output[0] = output[1] = output[2] = output[3] = 0; |
michael@0 | 251 | return; |
michael@0 | 252 | } |
michael@0 | 253 | |
michael@0 | 254 | s0 = sinpi_1_9 * x0; |
michael@0 | 255 | s1 = sinpi_2_9 * x0; |
michael@0 | 256 | s2 = sinpi_3_9 * x1; |
michael@0 | 257 | s3 = sinpi_4_9 * x2; |
michael@0 | 258 | s4 = sinpi_1_9 * x2; |
michael@0 | 259 | s5 = sinpi_2_9 * x3; |
michael@0 | 260 | s6 = sinpi_4_9 * x3; |
michael@0 | 261 | s7 = x0 - x2 + x3; |
michael@0 | 262 | |
michael@0 | 263 | x0 = s0 + s3 + s5; |
michael@0 | 264 | x1 = s1 - s4 - s6; |
michael@0 | 265 | x2 = sinpi_3_9 * s7; |
michael@0 | 266 | x3 = s2; |
michael@0 | 267 | |
michael@0 | 268 | s0 = x0 + x3; |
michael@0 | 269 | s1 = x1 + x3; |
michael@0 | 270 | s2 = x2; |
michael@0 | 271 | s3 = x0 + x1 - x3; |
michael@0 | 272 | |
michael@0 | 273 | // 1-D transform scaling factor is sqrt(2). |
michael@0 | 274 | // The overall dynamic range is 14b (input) + 14b (multiplication scaling) |
michael@0 | 275 | // + 1b (addition) = 29b. |
michael@0 | 276 | // Hence the output bit depth is 15b. |
michael@0 | 277 | output[0] = dct_const_round_shift(s0); |
michael@0 | 278 | output[1] = dct_const_round_shift(s1); |
michael@0 | 279 | output[2] = dct_const_round_shift(s2); |
michael@0 | 280 | output[3] = dct_const_round_shift(s3); |
michael@0 | 281 | } |
michael@0 | 282 | |
michael@0 | 283 | void vp9_iht4x4_16_add_c(const int16_t *input, uint8_t *dest, int stride, |
michael@0 | 284 | int tx_type) { |
michael@0 | 285 | const transform_2d IHT_4[] = { |
michael@0 | 286 | { idct4_1d, idct4_1d }, // DCT_DCT = 0 |
michael@0 | 287 | { iadst4_1d, idct4_1d }, // ADST_DCT = 1 |
michael@0 | 288 | { idct4_1d, iadst4_1d }, // DCT_ADST = 2 |
michael@0 | 289 | { iadst4_1d, iadst4_1d } // ADST_ADST = 3 |
michael@0 | 290 | }; |
michael@0 | 291 | |
michael@0 | 292 | int i, j; |
michael@0 | 293 | int16_t out[4 * 4]; |
michael@0 | 294 | int16_t *outptr = out; |
michael@0 | 295 | int16_t temp_in[4], temp_out[4]; |
michael@0 | 296 | |
michael@0 | 297 | // inverse transform row vectors |
michael@0 | 298 | for (i = 0; i < 4; ++i) { |
michael@0 | 299 | IHT_4[tx_type].rows(input, outptr); |
michael@0 | 300 | input += 4; |
michael@0 | 301 | outptr += 4; |
michael@0 | 302 | } |
michael@0 | 303 | |
michael@0 | 304 | // inverse transform column vectors |
michael@0 | 305 | for (i = 0; i < 4; ++i) { |
michael@0 | 306 | for (j = 0; j < 4; ++j) |
michael@0 | 307 | temp_in[j] = out[j * 4 + i]; |
michael@0 | 308 | IHT_4[tx_type].cols(temp_in, temp_out); |
michael@0 | 309 | for (j = 0; j < 4; ++j) |
michael@0 | 310 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) |
michael@0 | 311 | + dest[j * stride + i]); |
michael@0 | 312 | } |
michael@0 | 313 | } |
michael@0 | 314 | static void iadst8_1d(const int16_t *input, int16_t *output) { |
michael@0 | 315 | int s0, s1, s2, s3, s4, s5, s6, s7; |
michael@0 | 316 | |
michael@0 | 317 | int x0 = input[7]; |
michael@0 | 318 | int x1 = input[0]; |
michael@0 | 319 | int x2 = input[5]; |
michael@0 | 320 | int x3 = input[2]; |
michael@0 | 321 | int x4 = input[3]; |
michael@0 | 322 | int x5 = input[4]; |
michael@0 | 323 | int x6 = input[1]; |
michael@0 | 324 | int x7 = input[6]; |
michael@0 | 325 | |
michael@0 | 326 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { |
michael@0 | 327 | output[0] = output[1] = output[2] = output[3] = output[4] |
michael@0 | 328 | = output[5] = output[6] = output[7] = 0; |
michael@0 | 329 | return; |
michael@0 | 330 | } |
michael@0 | 331 | |
michael@0 | 332 | // stage 1 |
michael@0 | 333 | s0 = cospi_2_64 * x0 + cospi_30_64 * x1; |
michael@0 | 334 | s1 = cospi_30_64 * x0 - cospi_2_64 * x1; |
michael@0 | 335 | s2 = cospi_10_64 * x2 + cospi_22_64 * x3; |
michael@0 | 336 | s3 = cospi_22_64 * x2 - cospi_10_64 * x3; |
michael@0 | 337 | s4 = cospi_18_64 * x4 + cospi_14_64 * x5; |
michael@0 | 338 | s5 = cospi_14_64 * x4 - cospi_18_64 * x5; |
michael@0 | 339 | s6 = cospi_26_64 * x6 + cospi_6_64 * x7; |
michael@0 | 340 | s7 = cospi_6_64 * x6 - cospi_26_64 * x7; |
michael@0 | 341 | |
michael@0 | 342 | x0 = dct_const_round_shift(s0 + s4); |
michael@0 | 343 | x1 = dct_const_round_shift(s1 + s5); |
michael@0 | 344 | x2 = dct_const_round_shift(s2 + s6); |
michael@0 | 345 | x3 = dct_const_round_shift(s3 + s7); |
michael@0 | 346 | x4 = dct_const_round_shift(s0 - s4); |
michael@0 | 347 | x5 = dct_const_round_shift(s1 - s5); |
michael@0 | 348 | x6 = dct_const_round_shift(s2 - s6); |
michael@0 | 349 | x7 = dct_const_round_shift(s3 - s7); |
michael@0 | 350 | |
michael@0 | 351 | // stage 2 |
michael@0 | 352 | s0 = x0; |
michael@0 | 353 | s1 = x1; |
michael@0 | 354 | s2 = x2; |
michael@0 | 355 | s3 = x3; |
michael@0 | 356 | s4 = cospi_8_64 * x4 + cospi_24_64 * x5; |
michael@0 | 357 | s5 = cospi_24_64 * x4 - cospi_8_64 * x5; |
michael@0 | 358 | s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; |
michael@0 | 359 | s7 = cospi_8_64 * x6 + cospi_24_64 * x7; |
michael@0 | 360 | |
michael@0 | 361 | x0 = s0 + s2; |
michael@0 | 362 | x1 = s1 + s3; |
michael@0 | 363 | x2 = s0 - s2; |
michael@0 | 364 | x3 = s1 - s3; |
michael@0 | 365 | x4 = dct_const_round_shift(s4 + s6); |
michael@0 | 366 | x5 = dct_const_round_shift(s5 + s7); |
michael@0 | 367 | x6 = dct_const_round_shift(s4 - s6); |
michael@0 | 368 | x7 = dct_const_round_shift(s5 - s7); |
michael@0 | 369 | |
michael@0 | 370 | // stage 3 |
michael@0 | 371 | s2 = cospi_16_64 * (x2 + x3); |
michael@0 | 372 | s3 = cospi_16_64 * (x2 - x3); |
michael@0 | 373 | s6 = cospi_16_64 * (x6 + x7); |
michael@0 | 374 | s7 = cospi_16_64 * (x6 - x7); |
michael@0 | 375 | |
michael@0 | 376 | x2 = dct_const_round_shift(s2); |
michael@0 | 377 | x3 = dct_const_round_shift(s3); |
michael@0 | 378 | x6 = dct_const_round_shift(s6); |
michael@0 | 379 | x7 = dct_const_round_shift(s7); |
michael@0 | 380 | |
michael@0 | 381 | output[0] = x0; |
michael@0 | 382 | output[1] = -x4; |
michael@0 | 383 | output[2] = x6; |
michael@0 | 384 | output[3] = -x2; |
michael@0 | 385 | output[4] = x3; |
michael@0 | 386 | output[5] = -x7; |
michael@0 | 387 | output[6] = x5; |
michael@0 | 388 | output[7] = -x1; |
michael@0 | 389 | } |
michael@0 | 390 | |
michael@0 | 391 | static const transform_2d IHT_8[] = { |
michael@0 | 392 | { idct8_1d, idct8_1d }, // DCT_DCT = 0 |
michael@0 | 393 | { iadst8_1d, idct8_1d }, // ADST_DCT = 1 |
michael@0 | 394 | { idct8_1d, iadst8_1d }, // DCT_ADST = 2 |
michael@0 | 395 | { iadst8_1d, iadst8_1d } // ADST_ADST = 3 |
michael@0 | 396 | }; |
michael@0 | 397 | |
michael@0 | 398 | void vp9_iht8x8_64_add_c(const int16_t *input, uint8_t *dest, int stride, |
michael@0 | 399 | int tx_type) { |
michael@0 | 400 | int i, j; |
michael@0 | 401 | int16_t out[8 * 8]; |
michael@0 | 402 | int16_t *outptr = out; |
michael@0 | 403 | int16_t temp_in[8], temp_out[8]; |
michael@0 | 404 | const transform_2d ht = IHT_8[tx_type]; |
michael@0 | 405 | |
michael@0 | 406 | // inverse transform row vectors |
michael@0 | 407 | for (i = 0; i < 8; ++i) { |
michael@0 | 408 | ht.rows(input, outptr); |
michael@0 | 409 | input += 8; |
michael@0 | 410 | outptr += 8; |
michael@0 | 411 | } |
michael@0 | 412 | |
michael@0 | 413 | // inverse transform column vectors |
michael@0 | 414 | for (i = 0; i < 8; ++i) { |
michael@0 | 415 | for (j = 0; j < 8; ++j) |
michael@0 | 416 | temp_in[j] = out[j * 8 + i]; |
michael@0 | 417 | ht.cols(temp_in, temp_out); |
michael@0 | 418 | for (j = 0; j < 8; ++j) |
michael@0 | 419 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
michael@0 | 420 | + dest[j * stride + i]); |
michael@0 | 421 | } |
michael@0 | 422 | } |
michael@0 | 423 | |
michael@0 | 424 | void vp9_idct8x8_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 425 | int16_t out[8 * 8] = { 0 }; |
michael@0 | 426 | int16_t *outptr = out; |
michael@0 | 427 | int i, j; |
michael@0 | 428 | int16_t temp_in[8], temp_out[8]; |
michael@0 | 429 | |
michael@0 | 430 | // First transform rows |
michael@0 | 431 | // only first 4 row has non-zero coefs |
michael@0 | 432 | for (i = 0; i < 4; ++i) { |
michael@0 | 433 | idct8_1d(input, outptr); |
michael@0 | 434 | input += 8; |
michael@0 | 435 | outptr += 8; |
michael@0 | 436 | } |
michael@0 | 437 | |
michael@0 | 438 | // Then transform columns |
michael@0 | 439 | for (i = 0; i < 8; ++i) { |
michael@0 | 440 | for (j = 0; j < 8; ++j) |
michael@0 | 441 | temp_in[j] = out[j * 8 + i]; |
michael@0 | 442 | idct8_1d(temp_in, temp_out); |
michael@0 | 443 | for (j = 0; j < 8; ++j) |
michael@0 | 444 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) |
michael@0 | 445 | + dest[j * stride + i]); |
michael@0 | 446 | } |
michael@0 | 447 | } |
michael@0 | 448 | |
michael@0 | 449 | static void idct16_1d(const int16_t *input, int16_t *output) { |
michael@0 | 450 | int16_t step1[16], step2[16]; |
michael@0 | 451 | int temp1, temp2; |
michael@0 | 452 | |
michael@0 | 453 | // stage 1 |
michael@0 | 454 | step1[0] = input[0/2]; |
michael@0 | 455 | step1[1] = input[16/2]; |
michael@0 | 456 | step1[2] = input[8/2]; |
michael@0 | 457 | step1[3] = input[24/2]; |
michael@0 | 458 | step1[4] = input[4/2]; |
michael@0 | 459 | step1[5] = input[20/2]; |
michael@0 | 460 | step1[6] = input[12/2]; |
michael@0 | 461 | step1[7] = input[28/2]; |
michael@0 | 462 | step1[8] = input[2/2]; |
michael@0 | 463 | step1[9] = input[18/2]; |
michael@0 | 464 | step1[10] = input[10/2]; |
michael@0 | 465 | step1[11] = input[26/2]; |
michael@0 | 466 | step1[12] = input[6/2]; |
michael@0 | 467 | step1[13] = input[22/2]; |
michael@0 | 468 | step1[14] = input[14/2]; |
michael@0 | 469 | step1[15] = input[30/2]; |
michael@0 | 470 | |
michael@0 | 471 | // stage 2 |
michael@0 | 472 | step2[0] = step1[0]; |
michael@0 | 473 | step2[1] = step1[1]; |
michael@0 | 474 | step2[2] = step1[2]; |
michael@0 | 475 | step2[3] = step1[3]; |
michael@0 | 476 | step2[4] = step1[4]; |
michael@0 | 477 | step2[5] = step1[5]; |
michael@0 | 478 | step2[6] = step1[6]; |
michael@0 | 479 | step2[7] = step1[7]; |
michael@0 | 480 | |
michael@0 | 481 | temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
michael@0 | 482 | temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
michael@0 | 483 | step2[8] = dct_const_round_shift(temp1); |
michael@0 | 484 | step2[15] = dct_const_round_shift(temp2); |
michael@0 | 485 | |
michael@0 | 486 | temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
michael@0 | 487 | temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
michael@0 | 488 | step2[9] = dct_const_round_shift(temp1); |
michael@0 | 489 | step2[14] = dct_const_round_shift(temp2); |
michael@0 | 490 | |
michael@0 | 491 | temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
michael@0 | 492 | temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
michael@0 | 493 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 494 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 495 | |
michael@0 | 496 | temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
michael@0 | 497 | temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
michael@0 | 498 | step2[11] = dct_const_round_shift(temp1); |
michael@0 | 499 | step2[12] = dct_const_round_shift(temp2); |
michael@0 | 500 | |
michael@0 | 501 | // stage 3 |
michael@0 | 502 | step1[0] = step2[0]; |
michael@0 | 503 | step1[1] = step2[1]; |
michael@0 | 504 | step1[2] = step2[2]; |
michael@0 | 505 | step1[3] = step2[3]; |
michael@0 | 506 | |
michael@0 | 507 | temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
michael@0 | 508 | temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
michael@0 | 509 | step1[4] = dct_const_round_shift(temp1); |
michael@0 | 510 | step1[7] = dct_const_round_shift(temp2); |
michael@0 | 511 | temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
michael@0 | 512 | temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
michael@0 | 513 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 514 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 515 | |
michael@0 | 516 | step1[8] = step2[8] + step2[9]; |
michael@0 | 517 | step1[9] = step2[8] - step2[9]; |
michael@0 | 518 | step1[10] = -step2[10] + step2[11]; |
michael@0 | 519 | step1[11] = step2[10] + step2[11]; |
michael@0 | 520 | step1[12] = step2[12] + step2[13]; |
michael@0 | 521 | step1[13] = step2[12] - step2[13]; |
michael@0 | 522 | step1[14] = -step2[14] + step2[15]; |
michael@0 | 523 | step1[15] = step2[14] + step2[15]; |
michael@0 | 524 | |
michael@0 | 525 | // stage 4 |
michael@0 | 526 | temp1 = (step1[0] + step1[1]) * cospi_16_64; |
michael@0 | 527 | temp2 = (step1[0] - step1[1]) * cospi_16_64; |
michael@0 | 528 | step2[0] = dct_const_round_shift(temp1); |
michael@0 | 529 | step2[1] = dct_const_round_shift(temp2); |
michael@0 | 530 | temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
michael@0 | 531 | temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
michael@0 | 532 | step2[2] = dct_const_round_shift(temp1); |
michael@0 | 533 | step2[3] = dct_const_round_shift(temp2); |
michael@0 | 534 | step2[4] = step1[4] + step1[5]; |
michael@0 | 535 | step2[5] = step1[4] - step1[5]; |
michael@0 | 536 | step2[6] = -step1[6] + step1[7]; |
michael@0 | 537 | step2[7] = step1[6] + step1[7]; |
michael@0 | 538 | |
michael@0 | 539 | step2[8] = step1[8]; |
michael@0 | 540 | step2[15] = step1[15]; |
michael@0 | 541 | temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
michael@0 | 542 | temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
michael@0 | 543 | step2[9] = dct_const_round_shift(temp1); |
michael@0 | 544 | step2[14] = dct_const_round_shift(temp2); |
michael@0 | 545 | temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
michael@0 | 546 | temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
michael@0 | 547 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 548 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 549 | step2[11] = step1[11]; |
michael@0 | 550 | step2[12] = step1[12]; |
michael@0 | 551 | |
michael@0 | 552 | // stage 5 |
michael@0 | 553 | step1[0] = step2[0] + step2[3]; |
michael@0 | 554 | step1[1] = step2[1] + step2[2]; |
michael@0 | 555 | step1[2] = step2[1] - step2[2]; |
michael@0 | 556 | step1[3] = step2[0] - step2[3]; |
michael@0 | 557 | step1[4] = step2[4]; |
michael@0 | 558 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
michael@0 | 559 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
michael@0 | 560 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 561 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 562 | step1[7] = step2[7]; |
michael@0 | 563 | |
michael@0 | 564 | step1[8] = step2[8] + step2[11]; |
michael@0 | 565 | step1[9] = step2[9] + step2[10]; |
michael@0 | 566 | step1[10] = step2[9] - step2[10]; |
michael@0 | 567 | step1[11] = step2[8] - step2[11]; |
michael@0 | 568 | step1[12] = -step2[12] + step2[15]; |
michael@0 | 569 | step1[13] = -step2[13] + step2[14]; |
michael@0 | 570 | step1[14] = step2[13] + step2[14]; |
michael@0 | 571 | step1[15] = step2[12] + step2[15]; |
michael@0 | 572 | |
michael@0 | 573 | // stage 6 |
michael@0 | 574 | step2[0] = step1[0] + step1[7]; |
michael@0 | 575 | step2[1] = step1[1] + step1[6]; |
michael@0 | 576 | step2[2] = step1[2] + step1[5]; |
michael@0 | 577 | step2[3] = step1[3] + step1[4]; |
michael@0 | 578 | step2[4] = step1[3] - step1[4]; |
michael@0 | 579 | step2[5] = step1[2] - step1[5]; |
michael@0 | 580 | step2[6] = step1[1] - step1[6]; |
michael@0 | 581 | step2[7] = step1[0] - step1[7]; |
michael@0 | 582 | step2[8] = step1[8]; |
michael@0 | 583 | step2[9] = step1[9]; |
michael@0 | 584 | temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
michael@0 | 585 | temp2 = (step1[10] + step1[13]) * cospi_16_64; |
michael@0 | 586 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 587 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 588 | temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
michael@0 | 589 | temp2 = (step1[11] + step1[12]) * cospi_16_64; |
michael@0 | 590 | step2[11] = dct_const_round_shift(temp1); |
michael@0 | 591 | step2[12] = dct_const_round_shift(temp2); |
michael@0 | 592 | step2[14] = step1[14]; |
michael@0 | 593 | step2[15] = step1[15]; |
michael@0 | 594 | |
michael@0 | 595 | // stage 7 |
michael@0 | 596 | output[0] = step2[0] + step2[15]; |
michael@0 | 597 | output[1] = step2[1] + step2[14]; |
michael@0 | 598 | output[2] = step2[2] + step2[13]; |
michael@0 | 599 | output[3] = step2[3] + step2[12]; |
michael@0 | 600 | output[4] = step2[4] + step2[11]; |
michael@0 | 601 | output[5] = step2[5] + step2[10]; |
michael@0 | 602 | output[6] = step2[6] + step2[9]; |
michael@0 | 603 | output[7] = step2[7] + step2[8]; |
michael@0 | 604 | output[8] = step2[7] - step2[8]; |
michael@0 | 605 | output[9] = step2[6] - step2[9]; |
michael@0 | 606 | output[10] = step2[5] - step2[10]; |
michael@0 | 607 | output[11] = step2[4] - step2[11]; |
michael@0 | 608 | output[12] = step2[3] - step2[12]; |
michael@0 | 609 | output[13] = step2[2] - step2[13]; |
michael@0 | 610 | output[14] = step2[1] - step2[14]; |
michael@0 | 611 | output[15] = step2[0] - step2[15]; |
michael@0 | 612 | } |
michael@0 | 613 | |
michael@0 | 614 | void vp9_idct16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 615 | int16_t out[16 * 16]; |
michael@0 | 616 | int16_t *outptr = out; |
michael@0 | 617 | int i, j; |
michael@0 | 618 | int16_t temp_in[16], temp_out[16]; |
michael@0 | 619 | |
michael@0 | 620 | // First transform rows |
michael@0 | 621 | for (i = 0; i < 16; ++i) { |
michael@0 | 622 | idct16_1d(input, outptr); |
michael@0 | 623 | input += 16; |
michael@0 | 624 | outptr += 16; |
michael@0 | 625 | } |
michael@0 | 626 | |
michael@0 | 627 | // Then transform columns |
michael@0 | 628 | for (i = 0; i < 16; ++i) { |
michael@0 | 629 | for (j = 0; j < 16; ++j) |
michael@0 | 630 | temp_in[j] = out[j * 16 + i]; |
michael@0 | 631 | idct16_1d(temp_in, temp_out); |
michael@0 | 632 | for (j = 0; j < 16; ++j) |
michael@0 | 633 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
michael@0 | 634 | + dest[j * stride + i]); |
michael@0 | 635 | } |
michael@0 | 636 | } |
michael@0 | 637 | |
michael@0 | 638 | static void iadst16_1d(const int16_t *input, int16_t *output) { |
michael@0 | 639 | int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; |
michael@0 | 640 | |
michael@0 | 641 | int x0 = input[15]; |
michael@0 | 642 | int x1 = input[0]; |
michael@0 | 643 | int x2 = input[13]; |
michael@0 | 644 | int x3 = input[2]; |
michael@0 | 645 | int x4 = input[11]; |
michael@0 | 646 | int x5 = input[4]; |
michael@0 | 647 | int x6 = input[9]; |
michael@0 | 648 | int x7 = input[6]; |
michael@0 | 649 | int x8 = input[7]; |
michael@0 | 650 | int x9 = input[8]; |
michael@0 | 651 | int x10 = input[5]; |
michael@0 | 652 | int x11 = input[10]; |
michael@0 | 653 | int x12 = input[3]; |
michael@0 | 654 | int x13 = input[12]; |
michael@0 | 655 | int x14 = input[1]; |
michael@0 | 656 | int x15 = input[14]; |
michael@0 | 657 | |
michael@0 | 658 | if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 |
michael@0 | 659 | | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { |
michael@0 | 660 | output[0] = output[1] = output[2] = output[3] = output[4] |
michael@0 | 661 | = output[5] = output[6] = output[7] = output[8] |
michael@0 | 662 | = output[9] = output[10] = output[11] = output[12] |
michael@0 | 663 | = output[13] = output[14] = output[15] = 0; |
michael@0 | 664 | return; |
michael@0 | 665 | } |
michael@0 | 666 | |
michael@0 | 667 | // stage 1 |
michael@0 | 668 | s0 = x0 * cospi_1_64 + x1 * cospi_31_64; |
michael@0 | 669 | s1 = x0 * cospi_31_64 - x1 * cospi_1_64; |
michael@0 | 670 | s2 = x2 * cospi_5_64 + x3 * cospi_27_64; |
michael@0 | 671 | s3 = x2 * cospi_27_64 - x3 * cospi_5_64; |
michael@0 | 672 | s4 = x4 * cospi_9_64 + x5 * cospi_23_64; |
michael@0 | 673 | s5 = x4 * cospi_23_64 - x5 * cospi_9_64; |
michael@0 | 674 | s6 = x6 * cospi_13_64 + x7 * cospi_19_64; |
michael@0 | 675 | s7 = x6 * cospi_19_64 - x7 * cospi_13_64; |
michael@0 | 676 | s8 = x8 * cospi_17_64 + x9 * cospi_15_64; |
michael@0 | 677 | s9 = x8 * cospi_15_64 - x9 * cospi_17_64; |
michael@0 | 678 | s10 = x10 * cospi_21_64 + x11 * cospi_11_64; |
michael@0 | 679 | s11 = x10 * cospi_11_64 - x11 * cospi_21_64; |
michael@0 | 680 | s12 = x12 * cospi_25_64 + x13 * cospi_7_64; |
michael@0 | 681 | s13 = x12 * cospi_7_64 - x13 * cospi_25_64; |
michael@0 | 682 | s14 = x14 * cospi_29_64 + x15 * cospi_3_64; |
michael@0 | 683 | s15 = x14 * cospi_3_64 - x15 * cospi_29_64; |
michael@0 | 684 | |
michael@0 | 685 | x0 = dct_const_round_shift(s0 + s8); |
michael@0 | 686 | x1 = dct_const_round_shift(s1 + s9); |
michael@0 | 687 | x2 = dct_const_round_shift(s2 + s10); |
michael@0 | 688 | x3 = dct_const_round_shift(s3 + s11); |
michael@0 | 689 | x4 = dct_const_round_shift(s4 + s12); |
michael@0 | 690 | x5 = dct_const_round_shift(s5 + s13); |
michael@0 | 691 | x6 = dct_const_round_shift(s6 + s14); |
michael@0 | 692 | x7 = dct_const_round_shift(s7 + s15); |
michael@0 | 693 | x8 = dct_const_round_shift(s0 - s8); |
michael@0 | 694 | x9 = dct_const_round_shift(s1 - s9); |
michael@0 | 695 | x10 = dct_const_round_shift(s2 - s10); |
michael@0 | 696 | x11 = dct_const_round_shift(s3 - s11); |
michael@0 | 697 | x12 = dct_const_round_shift(s4 - s12); |
michael@0 | 698 | x13 = dct_const_round_shift(s5 - s13); |
michael@0 | 699 | x14 = dct_const_round_shift(s6 - s14); |
michael@0 | 700 | x15 = dct_const_round_shift(s7 - s15); |
michael@0 | 701 | |
michael@0 | 702 | // stage 2 |
michael@0 | 703 | s0 = x0; |
michael@0 | 704 | s1 = x1; |
michael@0 | 705 | s2 = x2; |
michael@0 | 706 | s3 = x3; |
michael@0 | 707 | s4 = x4; |
michael@0 | 708 | s5 = x5; |
michael@0 | 709 | s6 = x6; |
michael@0 | 710 | s7 = x7; |
michael@0 | 711 | s8 = x8 * cospi_4_64 + x9 * cospi_28_64; |
michael@0 | 712 | s9 = x8 * cospi_28_64 - x9 * cospi_4_64; |
michael@0 | 713 | s10 = x10 * cospi_20_64 + x11 * cospi_12_64; |
michael@0 | 714 | s11 = x10 * cospi_12_64 - x11 * cospi_20_64; |
michael@0 | 715 | s12 = - x12 * cospi_28_64 + x13 * cospi_4_64; |
michael@0 | 716 | s13 = x12 * cospi_4_64 + x13 * cospi_28_64; |
michael@0 | 717 | s14 = - x14 * cospi_12_64 + x15 * cospi_20_64; |
michael@0 | 718 | s15 = x14 * cospi_20_64 + x15 * cospi_12_64; |
michael@0 | 719 | |
michael@0 | 720 | x0 = s0 + s4; |
michael@0 | 721 | x1 = s1 + s5; |
michael@0 | 722 | x2 = s2 + s6; |
michael@0 | 723 | x3 = s3 + s7; |
michael@0 | 724 | x4 = s0 - s4; |
michael@0 | 725 | x5 = s1 - s5; |
michael@0 | 726 | x6 = s2 - s6; |
michael@0 | 727 | x7 = s3 - s7; |
michael@0 | 728 | x8 = dct_const_round_shift(s8 + s12); |
michael@0 | 729 | x9 = dct_const_round_shift(s9 + s13); |
michael@0 | 730 | x10 = dct_const_round_shift(s10 + s14); |
michael@0 | 731 | x11 = dct_const_round_shift(s11 + s15); |
michael@0 | 732 | x12 = dct_const_round_shift(s8 - s12); |
michael@0 | 733 | x13 = dct_const_round_shift(s9 - s13); |
michael@0 | 734 | x14 = dct_const_round_shift(s10 - s14); |
michael@0 | 735 | x15 = dct_const_round_shift(s11 - s15); |
michael@0 | 736 | |
michael@0 | 737 | // stage 3 |
michael@0 | 738 | s0 = x0; |
michael@0 | 739 | s1 = x1; |
michael@0 | 740 | s2 = x2; |
michael@0 | 741 | s3 = x3; |
michael@0 | 742 | s4 = x4 * cospi_8_64 + x5 * cospi_24_64; |
michael@0 | 743 | s5 = x4 * cospi_24_64 - x5 * cospi_8_64; |
michael@0 | 744 | s6 = - x6 * cospi_24_64 + x7 * cospi_8_64; |
michael@0 | 745 | s7 = x6 * cospi_8_64 + x7 * cospi_24_64; |
michael@0 | 746 | s8 = x8; |
michael@0 | 747 | s9 = x9; |
michael@0 | 748 | s10 = x10; |
michael@0 | 749 | s11 = x11; |
michael@0 | 750 | s12 = x12 * cospi_8_64 + x13 * cospi_24_64; |
michael@0 | 751 | s13 = x12 * cospi_24_64 - x13 * cospi_8_64; |
michael@0 | 752 | s14 = - x14 * cospi_24_64 + x15 * cospi_8_64; |
michael@0 | 753 | s15 = x14 * cospi_8_64 + x15 * cospi_24_64; |
michael@0 | 754 | |
michael@0 | 755 | x0 = s0 + s2; |
michael@0 | 756 | x1 = s1 + s3; |
michael@0 | 757 | x2 = s0 - s2; |
michael@0 | 758 | x3 = s1 - s3; |
michael@0 | 759 | x4 = dct_const_round_shift(s4 + s6); |
michael@0 | 760 | x5 = dct_const_round_shift(s5 + s7); |
michael@0 | 761 | x6 = dct_const_round_shift(s4 - s6); |
michael@0 | 762 | x7 = dct_const_round_shift(s5 - s7); |
michael@0 | 763 | x8 = s8 + s10; |
michael@0 | 764 | x9 = s9 + s11; |
michael@0 | 765 | x10 = s8 - s10; |
michael@0 | 766 | x11 = s9 - s11; |
michael@0 | 767 | x12 = dct_const_round_shift(s12 + s14); |
michael@0 | 768 | x13 = dct_const_round_shift(s13 + s15); |
michael@0 | 769 | x14 = dct_const_round_shift(s12 - s14); |
michael@0 | 770 | x15 = dct_const_round_shift(s13 - s15); |
michael@0 | 771 | |
michael@0 | 772 | // stage 4 |
michael@0 | 773 | s2 = (- cospi_16_64) * (x2 + x3); |
michael@0 | 774 | s3 = cospi_16_64 * (x2 - x3); |
michael@0 | 775 | s6 = cospi_16_64 * (x6 + x7); |
michael@0 | 776 | s7 = cospi_16_64 * (- x6 + x7); |
michael@0 | 777 | s10 = cospi_16_64 * (x10 + x11); |
michael@0 | 778 | s11 = cospi_16_64 * (- x10 + x11); |
michael@0 | 779 | s14 = (- cospi_16_64) * (x14 + x15); |
michael@0 | 780 | s15 = cospi_16_64 * (x14 - x15); |
michael@0 | 781 | |
michael@0 | 782 | x2 = dct_const_round_shift(s2); |
michael@0 | 783 | x3 = dct_const_round_shift(s3); |
michael@0 | 784 | x6 = dct_const_round_shift(s6); |
michael@0 | 785 | x7 = dct_const_round_shift(s7); |
michael@0 | 786 | x10 = dct_const_round_shift(s10); |
michael@0 | 787 | x11 = dct_const_round_shift(s11); |
michael@0 | 788 | x14 = dct_const_round_shift(s14); |
michael@0 | 789 | x15 = dct_const_round_shift(s15); |
michael@0 | 790 | |
michael@0 | 791 | output[0] = x0; |
michael@0 | 792 | output[1] = -x8; |
michael@0 | 793 | output[2] = x12; |
michael@0 | 794 | output[3] = -x4; |
michael@0 | 795 | output[4] = x6; |
michael@0 | 796 | output[5] = x14; |
michael@0 | 797 | output[6] = x10; |
michael@0 | 798 | output[7] = x2; |
michael@0 | 799 | output[8] = x3; |
michael@0 | 800 | output[9] = x11; |
michael@0 | 801 | output[10] = x15; |
michael@0 | 802 | output[11] = x7; |
michael@0 | 803 | output[12] = x5; |
michael@0 | 804 | output[13] = -x13; |
michael@0 | 805 | output[14] = x9; |
michael@0 | 806 | output[15] = -x1; |
michael@0 | 807 | } |
michael@0 | 808 | |
michael@0 | 809 | static const transform_2d IHT_16[] = { |
michael@0 | 810 | { idct16_1d, idct16_1d }, // DCT_DCT = 0 |
michael@0 | 811 | { iadst16_1d, idct16_1d }, // ADST_DCT = 1 |
michael@0 | 812 | { idct16_1d, iadst16_1d }, // DCT_ADST = 2 |
michael@0 | 813 | { iadst16_1d, iadst16_1d } // ADST_ADST = 3 |
michael@0 | 814 | }; |
michael@0 | 815 | |
michael@0 | 816 | void vp9_iht16x16_256_add_c(const int16_t *input, uint8_t *dest, int stride, |
michael@0 | 817 | int tx_type) { |
michael@0 | 818 | int i, j; |
michael@0 | 819 | int16_t out[16 * 16]; |
michael@0 | 820 | int16_t *outptr = out; |
michael@0 | 821 | int16_t temp_in[16], temp_out[16]; |
michael@0 | 822 | const transform_2d ht = IHT_16[tx_type]; |
michael@0 | 823 | |
michael@0 | 824 | // Rows |
michael@0 | 825 | for (i = 0; i < 16; ++i) { |
michael@0 | 826 | ht.rows(input, outptr); |
michael@0 | 827 | input += 16; |
michael@0 | 828 | outptr += 16; |
michael@0 | 829 | } |
michael@0 | 830 | |
michael@0 | 831 | // Columns |
michael@0 | 832 | for (i = 0; i < 16; ++i) { |
michael@0 | 833 | for (j = 0; j < 16; ++j) |
michael@0 | 834 | temp_in[j] = out[j * 16 + i]; |
michael@0 | 835 | ht.cols(temp_in, temp_out); |
michael@0 | 836 | for (j = 0; j < 16; ++j) |
michael@0 | 837 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
michael@0 | 838 | + dest[j * stride + i]); |
michael@0 | 839 | } |
michael@0 | 840 | } |
michael@0 | 841 | |
michael@0 | 842 | void vp9_idct16x16_10_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 843 | int16_t out[16 * 16] = { 0 }; |
michael@0 | 844 | int16_t *outptr = out; |
michael@0 | 845 | int i, j; |
michael@0 | 846 | int16_t temp_in[16], temp_out[16]; |
michael@0 | 847 | |
michael@0 | 848 | // First transform rows. Since all non-zero dct coefficients are in |
michael@0 | 849 | // upper-left 4x4 area, we only need to calculate first 4 rows here. |
michael@0 | 850 | for (i = 0; i < 4; ++i) { |
michael@0 | 851 | idct16_1d(input, outptr); |
michael@0 | 852 | input += 16; |
michael@0 | 853 | outptr += 16; |
michael@0 | 854 | } |
michael@0 | 855 | |
michael@0 | 856 | // Then transform columns |
michael@0 | 857 | for (i = 0; i < 16; ++i) { |
michael@0 | 858 | for (j = 0; j < 16; ++j) |
michael@0 | 859 | temp_in[j] = out[j*16 + i]; |
michael@0 | 860 | idct16_1d(temp_in, temp_out); |
michael@0 | 861 | for (j = 0; j < 16; ++j) |
michael@0 | 862 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
michael@0 | 863 | + dest[j * stride + i]); |
michael@0 | 864 | } |
michael@0 | 865 | } |
michael@0 | 866 | |
michael@0 | 867 | void vp9_idct16x16_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 868 | int i, j; |
michael@0 | 869 | int a1; |
michael@0 | 870 | int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
michael@0 | 871 | out = dct_const_round_shift(out * cospi_16_64); |
michael@0 | 872 | a1 = ROUND_POWER_OF_TWO(out, 6); |
michael@0 | 873 | for (j = 0; j < 16; ++j) { |
michael@0 | 874 | for (i = 0; i < 16; ++i) |
michael@0 | 875 | dest[i] = clip_pixel(dest[i] + a1); |
michael@0 | 876 | dest += stride; |
michael@0 | 877 | } |
michael@0 | 878 | } |
michael@0 | 879 | |
michael@0 | 880 | static void idct32_1d(const int16_t *input, int16_t *output) { |
michael@0 | 881 | int16_t step1[32], step2[32]; |
michael@0 | 882 | int temp1, temp2; |
michael@0 | 883 | |
michael@0 | 884 | // stage 1 |
michael@0 | 885 | step1[0] = input[0]; |
michael@0 | 886 | step1[1] = input[16]; |
michael@0 | 887 | step1[2] = input[8]; |
michael@0 | 888 | step1[3] = input[24]; |
michael@0 | 889 | step1[4] = input[4]; |
michael@0 | 890 | step1[5] = input[20]; |
michael@0 | 891 | step1[6] = input[12]; |
michael@0 | 892 | step1[7] = input[28]; |
michael@0 | 893 | step1[8] = input[2]; |
michael@0 | 894 | step1[9] = input[18]; |
michael@0 | 895 | step1[10] = input[10]; |
michael@0 | 896 | step1[11] = input[26]; |
michael@0 | 897 | step1[12] = input[6]; |
michael@0 | 898 | step1[13] = input[22]; |
michael@0 | 899 | step1[14] = input[14]; |
michael@0 | 900 | step1[15] = input[30]; |
michael@0 | 901 | |
michael@0 | 902 | temp1 = input[1] * cospi_31_64 - input[31] * cospi_1_64; |
michael@0 | 903 | temp2 = input[1] * cospi_1_64 + input[31] * cospi_31_64; |
michael@0 | 904 | step1[16] = dct_const_round_shift(temp1); |
michael@0 | 905 | step1[31] = dct_const_round_shift(temp2); |
michael@0 | 906 | |
michael@0 | 907 | temp1 = input[17] * cospi_15_64 - input[15] * cospi_17_64; |
michael@0 | 908 | temp2 = input[17] * cospi_17_64 + input[15] * cospi_15_64; |
michael@0 | 909 | step1[17] = dct_const_round_shift(temp1); |
michael@0 | 910 | step1[30] = dct_const_round_shift(temp2); |
michael@0 | 911 | |
michael@0 | 912 | temp1 = input[9] * cospi_23_64 - input[23] * cospi_9_64; |
michael@0 | 913 | temp2 = input[9] * cospi_9_64 + input[23] * cospi_23_64; |
michael@0 | 914 | step1[18] = dct_const_round_shift(temp1); |
michael@0 | 915 | step1[29] = dct_const_round_shift(temp2); |
michael@0 | 916 | |
michael@0 | 917 | temp1 = input[25] * cospi_7_64 - input[7] * cospi_25_64; |
michael@0 | 918 | temp2 = input[25] * cospi_25_64 + input[7] * cospi_7_64; |
michael@0 | 919 | step1[19] = dct_const_round_shift(temp1); |
michael@0 | 920 | step1[28] = dct_const_round_shift(temp2); |
michael@0 | 921 | |
michael@0 | 922 | temp1 = input[5] * cospi_27_64 - input[27] * cospi_5_64; |
michael@0 | 923 | temp2 = input[5] * cospi_5_64 + input[27] * cospi_27_64; |
michael@0 | 924 | step1[20] = dct_const_round_shift(temp1); |
michael@0 | 925 | step1[27] = dct_const_round_shift(temp2); |
michael@0 | 926 | |
michael@0 | 927 | temp1 = input[21] * cospi_11_64 - input[11] * cospi_21_64; |
michael@0 | 928 | temp2 = input[21] * cospi_21_64 + input[11] * cospi_11_64; |
michael@0 | 929 | step1[21] = dct_const_round_shift(temp1); |
michael@0 | 930 | step1[26] = dct_const_round_shift(temp2); |
michael@0 | 931 | |
michael@0 | 932 | temp1 = input[13] * cospi_19_64 - input[19] * cospi_13_64; |
michael@0 | 933 | temp2 = input[13] * cospi_13_64 + input[19] * cospi_19_64; |
michael@0 | 934 | step1[22] = dct_const_round_shift(temp1); |
michael@0 | 935 | step1[25] = dct_const_round_shift(temp2); |
michael@0 | 936 | |
michael@0 | 937 | temp1 = input[29] * cospi_3_64 - input[3] * cospi_29_64; |
michael@0 | 938 | temp2 = input[29] * cospi_29_64 + input[3] * cospi_3_64; |
michael@0 | 939 | step1[23] = dct_const_round_shift(temp1); |
michael@0 | 940 | step1[24] = dct_const_round_shift(temp2); |
michael@0 | 941 | |
michael@0 | 942 | // stage 2 |
michael@0 | 943 | step2[0] = step1[0]; |
michael@0 | 944 | step2[1] = step1[1]; |
michael@0 | 945 | step2[2] = step1[2]; |
michael@0 | 946 | step2[3] = step1[3]; |
michael@0 | 947 | step2[4] = step1[4]; |
michael@0 | 948 | step2[5] = step1[5]; |
michael@0 | 949 | step2[6] = step1[6]; |
michael@0 | 950 | step2[7] = step1[7]; |
michael@0 | 951 | |
michael@0 | 952 | temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; |
michael@0 | 953 | temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; |
michael@0 | 954 | step2[8] = dct_const_round_shift(temp1); |
michael@0 | 955 | step2[15] = dct_const_round_shift(temp2); |
michael@0 | 956 | |
michael@0 | 957 | temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; |
michael@0 | 958 | temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; |
michael@0 | 959 | step2[9] = dct_const_round_shift(temp1); |
michael@0 | 960 | step2[14] = dct_const_round_shift(temp2); |
michael@0 | 961 | |
michael@0 | 962 | temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; |
michael@0 | 963 | temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; |
michael@0 | 964 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 965 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 966 | |
michael@0 | 967 | temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; |
michael@0 | 968 | temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; |
michael@0 | 969 | step2[11] = dct_const_round_shift(temp1); |
michael@0 | 970 | step2[12] = dct_const_round_shift(temp2); |
michael@0 | 971 | |
michael@0 | 972 | step2[16] = step1[16] + step1[17]; |
michael@0 | 973 | step2[17] = step1[16] - step1[17]; |
michael@0 | 974 | step2[18] = -step1[18] + step1[19]; |
michael@0 | 975 | step2[19] = step1[18] + step1[19]; |
michael@0 | 976 | step2[20] = step1[20] + step1[21]; |
michael@0 | 977 | step2[21] = step1[20] - step1[21]; |
michael@0 | 978 | step2[22] = -step1[22] + step1[23]; |
michael@0 | 979 | step2[23] = step1[22] + step1[23]; |
michael@0 | 980 | step2[24] = step1[24] + step1[25]; |
michael@0 | 981 | step2[25] = step1[24] - step1[25]; |
michael@0 | 982 | step2[26] = -step1[26] + step1[27]; |
michael@0 | 983 | step2[27] = step1[26] + step1[27]; |
michael@0 | 984 | step2[28] = step1[28] + step1[29]; |
michael@0 | 985 | step2[29] = step1[28] - step1[29]; |
michael@0 | 986 | step2[30] = -step1[30] + step1[31]; |
michael@0 | 987 | step2[31] = step1[30] + step1[31]; |
michael@0 | 988 | |
michael@0 | 989 | // stage 3 |
michael@0 | 990 | step1[0] = step2[0]; |
michael@0 | 991 | step1[1] = step2[1]; |
michael@0 | 992 | step1[2] = step2[2]; |
michael@0 | 993 | step1[3] = step2[3]; |
michael@0 | 994 | |
michael@0 | 995 | temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; |
michael@0 | 996 | temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; |
michael@0 | 997 | step1[4] = dct_const_round_shift(temp1); |
michael@0 | 998 | step1[7] = dct_const_round_shift(temp2); |
michael@0 | 999 | temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; |
michael@0 | 1000 | temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; |
michael@0 | 1001 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 1002 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 1003 | |
michael@0 | 1004 | step1[8] = step2[8] + step2[9]; |
michael@0 | 1005 | step1[9] = step2[8] - step2[9]; |
michael@0 | 1006 | step1[10] = -step2[10] + step2[11]; |
michael@0 | 1007 | step1[11] = step2[10] + step2[11]; |
michael@0 | 1008 | step1[12] = step2[12] + step2[13]; |
michael@0 | 1009 | step1[13] = step2[12] - step2[13]; |
michael@0 | 1010 | step1[14] = -step2[14] + step2[15]; |
michael@0 | 1011 | step1[15] = step2[14] + step2[15]; |
michael@0 | 1012 | |
michael@0 | 1013 | step1[16] = step2[16]; |
michael@0 | 1014 | step1[31] = step2[31]; |
michael@0 | 1015 | temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; |
michael@0 | 1016 | temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; |
michael@0 | 1017 | step1[17] = dct_const_round_shift(temp1); |
michael@0 | 1018 | step1[30] = dct_const_round_shift(temp2); |
michael@0 | 1019 | temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; |
michael@0 | 1020 | temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; |
michael@0 | 1021 | step1[18] = dct_const_round_shift(temp1); |
michael@0 | 1022 | step1[29] = dct_const_round_shift(temp2); |
michael@0 | 1023 | step1[19] = step2[19]; |
michael@0 | 1024 | step1[20] = step2[20]; |
michael@0 | 1025 | temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; |
michael@0 | 1026 | temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; |
michael@0 | 1027 | step1[21] = dct_const_round_shift(temp1); |
michael@0 | 1028 | step1[26] = dct_const_round_shift(temp2); |
michael@0 | 1029 | temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; |
michael@0 | 1030 | temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; |
michael@0 | 1031 | step1[22] = dct_const_round_shift(temp1); |
michael@0 | 1032 | step1[25] = dct_const_round_shift(temp2); |
michael@0 | 1033 | step1[23] = step2[23]; |
michael@0 | 1034 | step1[24] = step2[24]; |
michael@0 | 1035 | step1[27] = step2[27]; |
michael@0 | 1036 | step1[28] = step2[28]; |
michael@0 | 1037 | |
michael@0 | 1038 | // stage 4 |
michael@0 | 1039 | temp1 = (step1[0] + step1[1]) * cospi_16_64; |
michael@0 | 1040 | temp2 = (step1[0] - step1[1]) * cospi_16_64; |
michael@0 | 1041 | step2[0] = dct_const_round_shift(temp1); |
michael@0 | 1042 | step2[1] = dct_const_round_shift(temp2); |
michael@0 | 1043 | temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; |
michael@0 | 1044 | temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; |
michael@0 | 1045 | step2[2] = dct_const_round_shift(temp1); |
michael@0 | 1046 | step2[3] = dct_const_round_shift(temp2); |
michael@0 | 1047 | step2[4] = step1[4] + step1[5]; |
michael@0 | 1048 | step2[5] = step1[4] - step1[5]; |
michael@0 | 1049 | step2[6] = -step1[6] + step1[7]; |
michael@0 | 1050 | step2[7] = step1[6] + step1[7]; |
michael@0 | 1051 | |
michael@0 | 1052 | step2[8] = step1[8]; |
michael@0 | 1053 | step2[15] = step1[15]; |
michael@0 | 1054 | temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; |
michael@0 | 1055 | temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; |
michael@0 | 1056 | step2[9] = dct_const_round_shift(temp1); |
michael@0 | 1057 | step2[14] = dct_const_round_shift(temp2); |
michael@0 | 1058 | temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; |
michael@0 | 1059 | temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; |
michael@0 | 1060 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 1061 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 1062 | step2[11] = step1[11]; |
michael@0 | 1063 | step2[12] = step1[12]; |
michael@0 | 1064 | |
michael@0 | 1065 | step2[16] = step1[16] + step1[19]; |
michael@0 | 1066 | step2[17] = step1[17] + step1[18]; |
michael@0 | 1067 | step2[18] = step1[17] - step1[18]; |
michael@0 | 1068 | step2[19] = step1[16] - step1[19]; |
michael@0 | 1069 | step2[20] = -step1[20] + step1[23]; |
michael@0 | 1070 | step2[21] = -step1[21] + step1[22]; |
michael@0 | 1071 | step2[22] = step1[21] + step1[22]; |
michael@0 | 1072 | step2[23] = step1[20] + step1[23]; |
michael@0 | 1073 | |
michael@0 | 1074 | step2[24] = step1[24] + step1[27]; |
michael@0 | 1075 | step2[25] = step1[25] + step1[26]; |
michael@0 | 1076 | step2[26] = step1[25] - step1[26]; |
michael@0 | 1077 | step2[27] = step1[24] - step1[27]; |
michael@0 | 1078 | step2[28] = -step1[28] + step1[31]; |
michael@0 | 1079 | step2[29] = -step1[29] + step1[30]; |
michael@0 | 1080 | step2[30] = step1[29] + step1[30]; |
michael@0 | 1081 | step2[31] = step1[28] + step1[31]; |
michael@0 | 1082 | |
michael@0 | 1083 | // stage 5 |
michael@0 | 1084 | step1[0] = step2[0] + step2[3]; |
michael@0 | 1085 | step1[1] = step2[1] + step2[2]; |
michael@0 | 1086 | step1[2] = step2[1] - step2[2]; |
michael@0 | 1087 | step1[3] = step2[0] - step2[3]; |
michael@0 | 1088 | step1[4] = step2[4]; |
michael@0 | 1089 | temp1 = (step2[6] - step2[5]) * cospi_16_64; |
michael@0 | 1090 | temp2 = (step2[5] + step2[6]) * cospi_16_64; |
michael@0 | 1091 | step1[5] = dct_const_round_shift(temp1); |
michael@0 | 1092 | step1[6] = dct_const_round_shift(temp2); |
michael@0 | 1093 | step1[7] = step2[7]; |
michael@0 | 1094 | |
michael@0 | 1095 | step1[8] = step2[8] + step2[11]; |
michael@0 | 1096 | step1[9] = step2[9] + step2[10]; |
michael@0 | 1097 | step1[10] = step2[9] - step2[10]; |
michael@0 | 1098 | step1[11] = step2[8] - step2[11]; |
michael@0 | 1099 | step1[12] = -step2[12] + step2[15]; |
michael@0 | 1100 | step1[13] = -step2[13] + step2[14]; |
michael@0 | 1101 | step1[14] = step2[13] + step2[14]; |
michael@0 | 1102 | step1[15] = step2[12] + step2[15]; |
michael@0 | 1103 | |
michael@0 | 1104 | step1[16] = step2[16]; |
michael@0 | 1105 | step1[17] = step2[17]; |
michael@0 | 1106 | temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; |
michael@0 | 1107 | temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; |
michael@0 | 1108 | step1[18] = dct_const_round_shift(temp1); |
michael@0 | 1109 | step1[29] = dct_const_round_shift(temp2); |
michael@0 | 1110 | temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; |
michael@0 | 1111 | temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; |
michael@0 | 1112 | step1[19] = dct_const_round_shift(temp1); |
michael@0 | 1113 | step1[28] = dct_const_round_shift(temp2); |
michael@0 | 1114 | temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; |
michael@0 | 1115 | temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; |
michael@0 | 1116 | step1[20] = dct_const_round_shift(temp1); |
michael@0 | 1117 | step1[27] = dct_const_round_shift(temp2); |
michael@0 | 1118 | temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; |
michael@0 | 1119 | temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; |
michael@0 | 1120 | step1[21] = dct_const_round_shift(temp1); |
michael@0 | 1121 | step1[26] = dct_const_round_shift(temp2); |
michael@0 | 1122 | step1[22] = step2[22]; |
michael@0 | 1123 | step1[23] = step2[23]; |
michael@0 | 1124 | step1[24] = step2[24]; |
michael@0 | 1125 | step1[25] = step2[25]; |
michael@0 | 1126 | step1[30] = step2[30]; |
michael@0 | 1127 | step1[31] = step2[31]; |
michael@0 | 1128 | |
michael@0 | 1129 | // stage 6 |
michael@0 | 1130 | step2[0] = step1[0] + step1[7]; |
michael@0 | 1131 | step2[1] = step1[1] + step1[6]; |
michael@0 | 1132 | step2[2] = step1[2] + step1[5]; |
michael@0 | 1133 | step2[3] = step1[3] + step1[4]; |
michael@0 | 1134 | step2[4] = step1[3] - step1[4]; |
michael@0 | 1135 | step2[5] = step1[2] - step1[5]; |
michael@0 | 1136 | step2[6] = step1[1] - step1[6]; |
michael@0 | 1137 | step2[7] = step1[0] - step1[7]; |
michael@0 | 1138 | step2[8] = step1[8]; |
michael@0 | 1139 | step2[9] = step1[9]; |
michael@0 | 1140 | temp1 = (-step1[10] + step1[13]) * cospi_16_64; |
michael@0 | 1141 | temp2 = (step1[10] + step1[13]) * cospi_16_64; |
michael@0 | 1142 | step2[10] = dct_const_round_shift(temp1); |
michael@0 | 1143 | step2[13] = dct_const_round_shift(temp2); |
michael@0 | 1144 | temp1 = (-step1[11] + step1[12]) * cospi_16_64; |
michael@0 | 1145 | temp2 = (step1[11] + step1[12]) * cospi_16_64; |
michael@0 | 1146 | step2[11] = dct_const_round_shift(temp1); |
michael@0 | 1147 | step2[12] = dct_const_round_shift(temp2); |
michael@0 | 1148 | step2[14] = step1[14]; |
michael@0 | 1149 | step2[15] = step1[15]; |
michael@0 | 1150 | |
michael@0 | 1151 | step2[16] = step1[16] + step1[23]; |
michael@0 | 1152 | step2[17] = step1[17] + step1[22]; |
michael@0 | 1153 | step2[18] = step1[18] + step1[21]; |
michael@0 | 1154 | step2[19] = step1[19] + step1[20]; |
michael@0 | 1155 | step2[20] = step1[19] - step1[20]; |
michael@0 | 1156 | step2[21] = step1[18] - step1[21]; |
michael@0 | 1157 | step2[22] = step1[17] - step1[22]; |
michael@0 | 1158 | step2[23] = step1[16] - step1[23]; |
michael@0 | 1159 | |
michael@0 | 1160 | step2[24] = -step1[24] + step1[31]; |
michael@0 | 1161 | step2[25] = -step1[25] + step1[30]; |
michael@0 | 1162 | step2[26] = -step1[26] + step1[29]; |
michael@0 | 1163 | step2[27] = -step1[27] + step1[28]; |
michael@0 | 1164 | step2[28] = step1[27] + step1[28]; |
michael@0 | 1165 | step2[29] = step1[26] + step1[29]; |
michael@0 | 1166 | step2[30] = step1[25] + step1[30]; |
michael@0 | 1167 | step2[31] = step1[24] + step1[31]; |
michael@0 | 1168 | |
michael@0 | 1169 | // stage 7 |
michael@0 | 1170 | step1[0] = step2[0] + step2[15]; |
michael@0 | 1171 | step1[1] = step2[1] + step2[14]; |
michael@0 | 1172 | step1[2] = step2[2] + step2[13]; |
michael@0 | 1173 | step1[3] = step2[3] + step2[12]; |
michael@0 | 1174 | step1[4] = step2[4] + step2[11]; |
michael@0 | 1175 | step1[5] = step2[5] + step2[10]; |
michael@0 | 1176 | step1[6] = step2[6] + step2[9]; |
michael@0 | 1177 | step1[7] = step2[7] + step2[8]; |
michael@0 | 1178 | step1[8] = step2[7] - step2[8]; |
michael@0 | 1179 | step1[9] = step2[6] - step2[9]; |
michael@0 | 1180 | step1[10] = step2[5] - step2[10]; |
michael@0 | 1181 | step1[11] = step2[4] - step2[11]; |
michael@0 | 1182 | step1[12] = step2[3] - step2[12]; |
michael@0 | 1183 | step1[13] = step2[2] - step2[13]; |
michael@0 | 1184 | step1[14] = step2[1] - step2[14]; |
michael@0 | 1185 | step1[15] = step2[0] - step2[15]; |
michael@0 | 1186 | |
michael@0 | 1187 | step1[16] = step2[16]; |
michael@0 | 1188 | step1[17] = step2[17]; |
michael@0 | 1189 | step1[18] = step2[18]; |
michael@0 | 1190 | step1[19] = step2[19]; |
michael@0 | 1191 | temp1 = (-step2[20] + step2[27]) * cospi_16_64; |
michael@0 | 1192 | temp2 = (step2[20] + step2[27]) * cospi_16_64; |
michael@0 | 1193 | step1[20] = dct_const_round_shift(temp1); |
michael@0 | 1194 | step1[27] = dct_const_round_shift(temp2); |
michael@0 | 1195 | temp1 = (-step2[21] + step2[26]) * cospi_16_64; |
michael@0 | 1196 | temp2 = (step2[21] + step2[26]) * cospi_16_64; |
michael@0 | 1197 | step1[21] = dct_const_round_shift(temp1); |
michael@0 | 1198 | step1[26] = dct_const_round_shift(temp2); |
michael@0 | 1199 | temp1 = (-step2[22] + step2[25]) * cospi_16_64; |
michael@0 | 1200 | temp2 = (step2[22] + step2[25]) * cospi_16_64; |
michael@0 | 1201 | step1[22] = dct_const_round_shift(temp1); |
michael@0 | 1202 | step1[25] = dct_const_round_shift(temp2); |
michael@0 | 1203 | temp1 = (-step2[23] + step2[24]) * cospi_16_64; |
michael@0 | 1204 | temp2 = (step2[23] + step2[24]) * cospi_16_64; |
michael@0 | 1205 | step1[23] = dct_const_round_shift(temp1); |
michael@0 | 1206 | step1[24] = dct_const_round_shift(temp2); |
michael@0 | 1207 | step1[28] = step2[28]; |
michael@0 | 1208 | step1[29] = step2[29]; |
michael@0 | 1209 | step1[30] = step2[30]; |
michael@0 | 1210 | step1[31] = step2[31]; |
michael@0 | 1211 | |
michael@0 | 1212 | // final stage |
michael@0 | 1213 | output[0] = step1[0] + step1[31]; |
michael@0 | 1214 | output[1] = step1[1] + step1[30]; |
michael@0 | 1215 | output[2] = step1[2] + step1[29]; |
michael@0 | 1216 | output[3] = step1[3] + step1[28]; |
michael@0 | 1217 | output[4] = step1[4] + step1[27]; |
michael@0 | 1218 | output[5] = step1[5] + step1[26]; |
michael@0 | 1219 | output[6] = step1[6] + step1[25]; |
michael@0 | 1220 | output[7] = step1[7] + step1[24]; |
michael@0 | 1221 | output[8] = step1[8] + step1[23]; |
michael@0 | 1222 | output[9] = step1[9] + step1[22]; |
michael@0 | 1223 | output[10] = step1[10] + step1[21]; |
michael@0 | 1224 | output[11] = step1[11] + step1[20]; |
michael@0 | 1225 | output[12] = step1[12] + step1[19]; |
michael@0 | 1226 | output[13] = step1[13] + step1[18]; |
michael@0 | 1227 | output[14] = step1[14] + step1[17]; |
michael@0 | 1228 | output[15] = step1[15] + step1[16]; |
michael@0 | 1229 | output[16] = step1[15] - step1[16]; |
michael@0 | 1230 | output[17] = step1[14] - step1[17]; |
michael@0 | 1231 | output[18] = step1[13] - step1[18]; |
michael@0 | 1232 | output[19] = step1[12] - step1[19]; |
michael@0 | 1233 | output[20] = step1[11] - step1[20]; |
michael@0 | 1234 | output[21] = step1[10] - step1[21]; |
michael@0 | 1235 | output[22] = step1[9] - step1[22]; |
michael@0 | 1236 | output[23] = step1[8] - step1[23]; |
michael@0 | 1237 | output[24] = step1[7] - step1[24]; |
michael@0 | 1238 | output[25] = step1[6] - step1[25]; |
michael@0 | 1239 | output[26] = step1[5] - step1[26]; |
michael@0 | 1240 | output[27] = step1[4] - step1[27]; |
michael@0 | 1241 | output[28] = step1[3] - step1[28]; |
michael@0 | 1242 | output[29] = step1[2] - step1[29]; |
michael@0 | 1243 | output[30] = step1[1] - step1[30]; |
michael@0 | 1244 | output[31] = step1[0] - step1[31]; |
michael@0 | 1245 | } |
michael@0 | 1246 | |
michael@0 | 1247 | void vp9_idct32x32_1024_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 1248 | int16_t out[32 * 32]; |
michael@0 | 1249 | int16_t *outptr = out; |
michael@0 | 1250 | int i, j; |
michael@0 | 1251 | int16_t temp_in[32], temp_out[32]; |
michael@0 | 1252 | |
michael@0 | 1253 | // Rows |
michael@0 | 1254 | for (i = 0; i < 32; ++i) { |
michael@0 | 1255 | int16_t zero_coeff[16]; |
michael@0 | 1256 | for (j = 0; j < 16; ++j) |
michael@0 | 1257 | zero_coeff[j] = input[2 * j] | input[2 * j + 1]; |
michael@0 | 1258 | for (j = 0; j < 8; ++j) |
michael@0 | 1259 | zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
michael@0 | 1260 | for (j = 0; j < 4; ++j) |
michael@0 | 1261 | zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
michael@0 | 1262 | for (j = 0; j < 2; ++j) |
michael@0 | 1263 | zero_coeff[j] = zero_coeff[2 * j] | zero_coeff[2 * j + 1]; |
michael@0 | 1264 | |
michael@0 | 1265 | if (zero_coeff[0] | zero_coeff[1]) |
michael@0 | 1266 | idct32_1d(input, outptr); |
michael@0 | 1267 | else |
michael@0 | 1268 | vpx_memset(outptr, 0, sizeof(int16_t) * 32); |
michael@0 | 1269 | input += 32; |
michael@0 | 1270 | outptr += 32; |
michael@0 | 1271 | } |
michael@0 | 1272 | |
michael@0 | 1273 | // Columns |
michael@0 | 1274 | for (i = 0; i < 32; ++i) { |
michael@0 | 1275 | for (j = 0; j < 32; ++j) |
michael@0 | 1276 | temp_in[j] = out[j * 32 + i]; |
michael@0 | 1277 | idct32_1d(temp_in, temp_out); |
michael@0 | 1278 | for (j = 0; j < 32; ++j) |
michael@0 | 1279 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
michael@0 | 1280 | + dest[j * stride + i]); |
michael@0 | 1281 | } |
michael@0 | 1282 | } |
michael@0 | 1283 | |
michael@0 | 1284 | void vp9_idct32x32_34_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 1285 | int16_t out[32 * 32] = {0}; |
michael@0 | 1286 | int16_t *outptr = out; |
michael@0 | 1287 | int i, j; |
michael@0 | 1288 | int16_t temp_in[32], temp_out[32]; |
michael@0 | 1289 | |
michael@0 | 1290 | // Rows |
michael@0 | 1291 | // only upper-left 8x8 has non-zero coeff |
michael@0 | 1292 | for (i = 0; i < 8; ++i) { |
michael@0 | 1293 | idct32_1d(input, outptr); |
michael@0 | 1294 | input += 32; |
michael@0 | 1295 | outptr += 32; |
michael@0 | 1296 | } |
michael@0 | 1297 | |
michael@0 | 1298 | // Columns |
michael@0 | 1299 | for (i = 0; i < 32; ++i) { |
michael@0 | 1300 | for (j = 0; j < 32; ++j) |
michael@0 | 1301 | temp_in[j] = out[j * 32 + i]; |
michael@0 | 1302 | idct32_1d(temp_in, temp_out); |
michael@0 | 1303 | for (j = 0; j < 32; ++j) |
michael@0 | 1304 | dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) |
michael@0 | 1305 | + dest[j * stride + i]); |
michael@0 | 1306 | } |
michael@0 | 1307 | } |
michael@0 | 1308 | |
michael@0 | 1309 | void vp9_idct32x32_1_add_c(const int16_t *input, uint8_t *dest, int stride) { |
michael@0 | 1310 | int i, j; |
michael@0 | 1311 | int a1; |
michael@0 | 1312 | |
michael@0 | 1313 | int16_t out = dct_const_round_shift(input[0] * cospi_16_64); |
michael@0 | 1314 | out = dct_const_round_shift(out * cospi_16_64); |
michael@0 | 1315 | a1 = ROUND_POWER_OF_TWO(out, 6); |
michael@0 | 1316 | |
michael@0 | 1317 | for (j = 0; j < 32; ++j) { |
michael@0 | 1318 | for (i = 0; i < 32; ++i) |
michael@0 | 1319 | dest[i] = clip_pixel(dest[i] + a1); |
michael@0 | 1320 | dest += stride; |
michael@0 | 1321 | } |
michael@0 | 1322 | } |
michael@0 | 1323 | |
michael@0 | 1324 | // idct |
michael@0 | 1325 | void vp9_idct4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
michael@0 | 1326 | if (eob > 1) |
michael@0 | 1327 | vp9_idct4x4_16_add(input, dest, stride); |
michael@0 | 1328 | else |
michael@0 | 1329 | vp9_idct4x4_1_add(input, dest, stride); |
michael@0 | 1330 | } |
michael@0 | 1331 | |
michael@0 | 1332 | |
michael@0 | 1333 | void vp9_iwht4x4_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
michael@0 | 1334 | if (eob > 1) |
michael@0 | 1335 | vp9_iwht4x4_16_add(input, dest, stride); |
michael@0 | 1336 | else |
michael@0 | 1337 | vp9_iwht4x4_1_add(input, dest, stride); |
michael@0 | 1338 | } |
michael@0 | 1339 | |
michael@0 | 1340 | void vp9_idct8x8_add(const int16_t *input, uint8_t *dest, int stride, int eob) { |
michael@0 | 1341 | // If dc is 1, then input[0] is the reconstructed value, do not need |
michael@0 | 1342 | // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. |
michael@0 | 1343 | |
michael@0 | 1344 | // The calculation can be simplified if there are not many non-zero dct |
michael@0 | 1345 | // coefficients. Use eobs to decide what to do. |
michael@0 | 1346 | // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. |
michael@0 | 1347 | // Combine that with code here. |
michael@0 | 1348 | if (eob) { |
michael@0 | 1349 | if (eob == 1) |
michael@0 | 1350 | // DC only DCT coefficient |
michael@0 | 1351 | vp9_idct8x8_1_add(input, dest, stride); |
michael@0 | 1352 | else if (eob <= 10) |
michael@0 | 1353 | vp9_idct8x8_10_add(input, dest, stride); |
michael@0 | 1354 | else |
michael@0 | 1355 | vp9_idct8x8_64_add(input, dest, stride); |
michael@0 | 1356 | } |
michael@0 | 1357 | } |
michael@0 | 1358 | |
michael@0 | 1359 | void vp9_idct16x16_add(const int16_t *input, uint8_t *dest, int stride, |
michael@0 | 1360 | int eob) { |
michael@0 | 1361 | /* The calculation can be simplified if there are not many non-zero dct |
michael@0 | 1362 | * coefficients. Use eobs to separate different cases. */ |
michael@0 | 1363 | if (eob) { |
michael@0 | 1364 | if (eob == 1) |
michael@0 | 1365 | /* DC only DCT coefficient. */ |
michael@0 | 1366 | vp9_idct16x16_1_add(input, dest, stride); |
michael@0 | 1367 | else if (eob <= 10) |
michael@0 | 1368 | vp9_idct16x16_10_add(input, dest, stride); |
michael@0 | 1369 | else |
michael@0 | 1370 | vp9_idct16x16_256_add(input, dest, stride); |
michael@0 | 1371 | } |
michael@0 | 1372 | } |
michael@0 | 1373 | |
michael@0 | 1374 | void vp9_idct32x32_add(const int16_t *input, uint8_t *dest, int stride, |
michael@0 | 1375 | int eob) { |
michael@0 | 1376 | if (eob) { |
michael@0 | 1377 | if (eob == 1) |
michael@0 | 1378 | vp9_idct32x32_1_add(input, dest, stride); |
michael@0 | 1379 | else if (eob <= 34) |
michael@0 | 1380 | // non-zero coeff only in upper-left 8x8 |
michael@0 | 1381 | vp9_idct32x32_34_add(input, dest, stride); |
michael@0 | 1382 | else |
michael@0 | 1383 | vp9_idct32x32_1024_add(input, dest, stride); |
michael@0 | 1384 | } |
michael@0 | 1385 | } |
michael@0 | 1386 | |
michael@0 | 1387 | // iht |
michael@0 | 1388 | void vp9_iht4x4_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
michael@0 | 1389 | int stride, int eob) { |
michael@0 | 1390 | if (tx_type == DCT_DCT) |
michael@0 | 1391 | vp9_idct4x4_add(input, dest, stride, eob); |
michael@0 | 1392 | else |
michael@0 | 1393 | vp9_iht4x4_16_add(input, dest, stride, tx_type); |
michael@0 | 1394 | } |
michael@0 | 1395 | |
michael@0 | 1396 | void vp9_iht8x8_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
michael@0 | 1397 | int stride, int eob) { |
michael@0 | 1398 | if (tx_type == DCT_DCT) { |
michael@0 | 1399 | vp9_idct8x8_add(input, dest, stride, eob); |
michael@0 | 1400 | } else { |
michael@0 | 1401 | if (eob > 0) { |
michael@0 | 1402 | vp9_iht8x8_64_add(input, dest, stride, tx_type); |
michael@0 | 1403 | } |
michael@0 | 1404 | } |
michael@0 | 1405 | } |
michael@0 | 1406 | |
michael@0 | 1407 | void vp9_iht16x16_add(TX_TYPE tx_type, const int16_t *input, uint8_t *dest, |
michael@0 | 1408 | int stride, int eob) { |
michael@0 | 1409 | if (tx_type == DCT_DCT) { |
michael@0 | 1410 | vp9_idct16x16_add(input, dest, stride, eob); |
michael@0 | 1411 | } else { |
michael@0 | 1412 | if (eob > 0) { |
michael@0 | 1413 | vp9_iht16x16_256_add(input, dest, stride, tx_type); |
michael@0 | 1414 | } |
michael@0 | 1415 | } |
michael@0 | 1416 | } |