Thu, 22 Jan 2015 13:21:57 +0100
Incorporate requested changes from Mozilla in review:
https://bugzilla.mozilla.org/show_bug.cgi?id=1123480#c6
1 /*
2 * jsimd_arm.c
3 *
4 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
5 * Copyright 2009-2011 D. R. Commander
6 *
7 * Based on the x86 SIMD extension for IJG JPEG library,
8 * Copyright (C) 1999-2006, MIYASAKA Masaru.
9 * For conditions of distribution and use, see copyright notice in jsimdext.inc
10 *
11 * This file contains the interface between the "normal" portions
12 * of the library and the SIMD implementations when running on
13 * ARM architecture.
14 *
15 * Based on the stubs from 'jsimd_none.c'
16 */
18 #define JPEG_INTERNALS
19 #include "../jinclude.h"
20 #include "../jpeglib.h"
21 #include "../jsimd.h"
22 #include "../jdct.h"
23 #include "../jsimddct.h"
24 #include "jsimd.h"
26 #include <stdio.h>
27 #include <string.h>
28 #include <ctype.h>
30 static unsigned int simd_support = ~0;
32 #if defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
34 #define SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT (1024 * 1024)
36 LOCAL(int)
37 check_feature (char *buffer, char *feature)
38 {
39 char *p;
40 if (*feature == 0)
41 return 0;
42 if (strncmp(buffer, "Features", 8) != 0)
43 return 0;
44 buffer += 8;
45 while (isspace(*buffer))
46 buffer++;
48 /* Check if 'feature' is present in the buffer as a separate word */
49 while ((p = strstr(buffer, feature))) {
50 if (p > buffer && !isspace(*(p - 1))) {
51 buffer++;
52 continue;
53 }
54 p += strlen(feature);
55 if (*p != 0 && !isspace(*p)) {
56 buffer++;
57 continue;
58 }
59 return 1;
60 }
61 return 0;
62 }
64 LOCAL(int)
65 parse_proc_cpuinfo (int bufsize)
66 {
67 char *buffer = (char *)malloc(bufsize);
68 FILE *fd;
69 simd_support = 0;
71 if (!buffer)
72 return 0;
74 fd = fopen("/proc/cpuinfo", "r");
75 if (fd) {
76 while (fgets(buffer, bufsize, fd)) {
77 if (!strchr(buffer, '\n') && !feof(fd)) {
78 /* "impossible" happened - insufficient size of the buffer! */
79 fclose(fd);
80 free(buffer);
81 return 0;
82 }
83 if (check_feature(buffer, "neon"))
84 simd_support |= JSIMD_ARM_NEON;
85 }
86 fclose(fd);
87 }
88 free(buffer);
89 return 1;
90 }
92 #endif
94 /*
95 * Check what SIMD accelerations are supported.
96 *
97 * FIXME: This code is racy under a multi-threaded environment.
98 */
99 LOCAL(void)
100 init_simd (void)
101 {
102 char *env = NULL;
103 #if !defined(__ARM_NEON__) && defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
104 int bufsize = 1024; /* an initial guess for the line buffer size limit */
105 #endif
107 if (simd_support != ~0U)
108 return;
110 simd_support = 0;
112 #if defined(__ARM_NEON__)
113 simd_support |= JSIMD_ARM_NEON;
114 #elif defined(__linux__) || defined(ANDROID) || defined(__ANDROID__)
115 /* We still have a chance to use NEON regardless of globally used
116 * -mcpu/-mfpu options passed to gcc by performing runtime detection via
117 * /proc/cpuinfo parsing on linux/android */
118 while (!parse_proc_cpuinfo(bufsize)) {
119 bufsize *= 2;
120 if (bufsize > SOMEWHAT_SANE_PROC_CPUINFO_SIZE_LIMIT)
121 break;
122 }
123 #endif
125 /* Force different settings through environment variables */
126 env = getenv("JSIMD_FORCE_ARM_NEON");
127 if ((env != NULL) && (strcmp(env, "1") == 0))
128 simd_support &= JSIMD_ARM_NEON;
129 env = getenv("JSIMD_FORCE_NO_SIMD");
130 if ((env != NULL) && (strcmp(env, "1") == 0))
131 simd_support = 0;
132 }
134 GLOBAL(int)
135 jsimd_can_rgb_ycc (void)
136 {
137 init_simd();
139 /* The code is optimised for these values only */
140 if (BITS_IN_JSAMPLE != 8)
141 return 0;
142 if (sizeof(JDIMENSION) != 4)
143 return 0;
144 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
145 return 0;
147 if (simd_support & JSIMD_ARM_NEON)
148 return 1;
150 return 0;
151 }
153 GLOBAL(int)
154 jsimd_can_rgb_gray (void)
155 {
156 init_simd();
158 return 0;
159 }
161 GLOBAL(int)
162 jsimd_can_ycc_rgb (void)
163 {
164 init_simd();
166 /* The code is optimised for these values only */
167 if (BITS_IN_JSAMPLE != 8)
168 return 0;
169 if (sizeof(JDIMENSION) != 4)
170 return 0;
171 if ((RGB_PIXELSIZE != 3) && (RGB_PIXELSIZE != 4))
172 return 0;
173 if (simd_support & JSIMD_ARM_NEON)
174 return 1;
176 return 0;
177 }
179 GLOBAL(void)
180 jsimd_rgb_ycc_convert (j_compress_ptr cinfo,
181 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
182 JDIMENSION output_row, int num_rows)
183 {
184 void (*neonfct)(JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int);
186 switch(cinfo->in_color_space)
187 {
188 case JCS_EXT_RGB:
189 neonfct=jsimd_extrgb_ycc_convert_neon;
190 break;
191 case JCS_EXT_RGBX:
192 case JCS_EXT_RGBA:
193 neonfct=jsimd_extrgbx_ycc_convert_neon;
194 break;
195 case JCS_EXT_BGR:
196 neonfct=jsimd_extbgr_ycc_convert_neon;
197 break;
198 case JCS_EXT_BGRX:
199 case JCS_EXT_BGRA:
200 neonfct=jsimd_extbgrx_ycc_convert_neon;
201 break;
202 case JCS_EXT_XBGR:
203 case JCS_EXT_ABGR:
204 neonfct=jsimd_extxbgr_ycc_convert_neon;
205 break;
206 case JCS_EXT_XRGB:
207 case JCS_EXT_ARGB:
208 neonfct=jsimd_extxrgb_ycc_convert_neon;
209 break;
210 default:
211 neonfct=jsimd_extrgb_ycc_convert_neon;
212 break;
213 }
215 if (simd_support & JSIMD_ARM_NEON)
216 neonfct(cinfo->image_width, input_buf,
217 output_buf, output_row, num_rows);
218 }
220 GLOBAL(void)
221 jsimd_rgb_gray_convert (j_compress_ptr cinfo,
222 JSAMPARRAY input_buf, JSAMPIMAGE output_buf,
223 JDIMENSION output_row, int num_rows)
224 {
225 }
227 GLOBAL(void)
228 jsimd_ycc_rgb_convert (j_decompress_ptr cinfo,
229 JSAMPIMAGE input_buf, JDIMENSION input_row,
230 JSAMPARRAY output_buf, int num_rows)
231 {
232 void (*neonfct)(JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int);
234 switch(cinfo->out_color_space)
235 {
236 case JCS_EXT_RGB:
237 neonfct=jsimd_ycc_extrgb_convert_neon;
238 break;
239 case JCS_EXT_RGBX:
240 case JCS_EXT_RGBA:
241 neonfct=jsimd_ycc_extrgbx_convert_neon;
242 break;
243 case JCS_EXT_BGR:
244 neonfct=jsimd_ycc_extbgr_convert_neon;
245 break;
246 case JCS_EXT_BGRX:
247 case JCS_EXT_BGRA:
248 neonfct=jsimd_ycc_extbgrx_convert_neon;
249 break;
250 case JCS_EXT_XBGR:
251 case JCS_EXT_ABGR:
252 neonfct=jsimd_ycc_extxbgr_convert_neon;
253 break;
254 case JCS_EXT_XRGB:
255 case JCS_EXT_ARGB:
256 neonfct=jsimd_ycc_extxrgb_convert_neon;
257 break;
258 default:
259 neonfct=jsimd_ycc_extrgb_convert_neon;
260 break;
261 }
263 if (simd_support & JSIMD_ARM_NEON)
264 neonfct(cinfo->output_width, input_buf,
265 input_row, output_buf, num_rows);
266 }
268 GLOBAL(int)
269 jsimd_can_h2v2_downsample (void)
270 {
271 init_simd();
273 return 0;
274 }
276 GLOBAL(int)
277 jsimd_can_h2v1_downsample (void)
278 {
279 init_simd();
281 return 0;
282 }
284 GLOBAL(void)
285 jsimd_h2v2_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
286 JSAMPARRAY input_data, JSAMPARRAY output_data)
287 {
288 }
290 GLOBAL(void)
291 jsimd_h2v1_downsample (j_compress_ptr cinfo, jpeg_component_info * compptr,
292 JSAMPARRAY input_data, JSAMPARRAY output_data)
293 {
294 }
296 GLOBAL(int)
297 jsimd_can_h2v2_upsample (void)
298 {
299 init_simd();
301 return 0;
302 }
304 GLOBAL(int)
305 jsimd_can_h2v1_upsample (void)
306 {
307 init_simd();
309 return 0;
310 }
312 GLOBAL(void)
313 jsimd_h2v2_upsample (j_decompress_ptr cinfo,
314 jpeg_component_info * compptr,
315 JSAMPARRAY input_data,
316 JSAMPARRAY * output_data_ptr)
317 {
318 }
320 GLOBAL(void)
321 jsimd_h2v1_upsample (j_decompress_ptr cinfo,
322 jpeg_component_info * compptr,
323 JSAMPARRAY input_data,
324 JSAMPARRAY * output_data_ptr)
325 {
326 }
328 GLOBAL(int)
329 jsimd_can_h2v2_fancy_upsample (void)
330 {
331 init_simd();
333 return 0;
334 }
336 GLOBAL(int)
337 jsimd_can_h2v1_fancy_upsample (void)
338 {
339 init_simd();
341 /* The code is optimised for these values only */
342 if (BITS_IN_JSAMPLE != 8)
343 return 0;
344 if (sizeof(JDIMENSION) != 4)
345 return 0;
347 if (simd_support & JSIMD_ARM_NEON)
348 return 1;
350 return 0;
351 }
353 GLOBAL(void)
354 jsimd_h2v2_fancy_upsample (j_decompress_ptr cinfo,
355 jpeg_component_info * compptr,
356 JSAMPARRAY input_data,
357 JSAMPARRAY * output_data_ptr)
358 {
359 }
361 GLOBAL(void)
362 jsimd_h2v1_fancy_upsample (j_decompress_ptr cinfo,
363 jpeg_component_info * compptr,
364 JSAMPARRAY input_data,
365 JSAMPARRAY * output_data_ptr)
366 {
367 if (simd_support & JSIMD_ARM_NEON)
368 jsimd_h2v1_fancy_upsample_neon(cinfo->max_v_samp_factor,
369 compptr->downsampled_width, input_data, output_data_ptr);
370 }
372 GLOBAL(int)
373 jsimd_can_h2v2_merged_upsample (void)
374 {
375 init_simd();
377 return 0;
378 }
380 GLOBAL(int)
381 jsimd_can_h2v1_merged_upsample (void)
382 {
383 init_simd();
385 return 0;
386 }
388 GLOBAL(void)
389 jsimd_h2v2_merged_upsample (j_decompress_ptr cinfo,
390 JSAMPIMAGE input_buf,
391 JDIMENSION in_row_group_ctr,
392 JSAMPARRAY output_buf)
393 {
394 }
396 GLOBAL(void)
397 jsimd_h2v1_merged_upsample (j_decompress_ptr cinfo,
398 JSAMPIMAGE input_buf,
399 JDIMENSION in_row_group_ctr,
400 JSAMPARRAY output_buf)
401 {
402 }
404 GLOBAL(int)
405 jsimd_can_convsamp (void)
406 {
407 init_simd();
409 /* The code is optimised for these values only */
410 if (DCTSIZE != 8)
411 return 0;
412 if (BITS_IN_JSAMPLE != 8)
413 return 0;
414 if (sizeof(JDIMENSION) != 4)
415 return 0;
416 if (sizeof(DCTELEM) != 2)
417 return 0;
419 if (simd_support & JSIMD_ARM_NEON)
420 return 1;
422 return 0;
423 }
425 GLOBAL(int)
426 jsimd_can_convsamp_float (void)
427 {
428 init_simd();
430 return 0;
431 }
433 GLOBAL(void)
434 jsimd_convsamp (JSAMPARRAY sample_data, JDIMENSION start_col,
435 DCTELEM * workspace)
436 {
437 if (simd_support & JSIMD_ARM_NEON)
438 jsimd_convsamp_neon(sample_data, start_col, workspace);
439 }
441 GLOBAL(void)
442 jsimd_convsamp_float (JSAMPARRAY sample_data, JDIMENSION start_col,
443 FAST_FLOAT * workspace)
444 {
445 }
447 GLOBAL(int)
448 jsimd_can_fdct_islow (void)
449 {
450 init_simd();
452 return 0;
453 }
455 GLOBAL(int)
456 jsimd_can_fdct_ifast (void)
457 {
458 init_simd();
460 /* The code is optimised for these values only */
461 if (DCTSIZE != 8)
462 return 0;
463 if (sizeof(DCTELEM) != 2)
464 return 0;
466 if (simd_support & JSIMD_ARM_NEON)
467 return 1;
469 return 0;
470 }
472 GLOBAL(int)
473 jsimd_can_fdct_float (void)
474 {
475 init_simd();
477 return 0;
478 }
480 GLOBAL(void)
481 jsimd_fdct_islow (DCTELEM * data)
482 {
483 }
485 GLOBAL(void)
486 jsimd_fdct_ifast (DCTELEM * data)
487 {
488 if (simd_support & JSIMD_ARM_NEON)
489 jsimd_fdct_ifast_neon(data);
490 }
492 GLOBAL(void)
493 jsimd_fdct_float (FAST_FLOAT * data)
494 {
495 }
497 GLOBAL(int)
498 jsimd_can_quantize (void)
499 {
500 init_simd();
502 /* The code is optimised for these values only */
503 if (DCTSIZE != 8)
504 return 0;
505 if (sizeof(JCOEF) != 2)
506 return 0;
507 if (sizeof(DCTELEM) != 2)
508 return 0;
510 if (simd_support & JSIMD_ARM_NEON)
511 return 1;
513 return 0;
514 }
516 GLOBAL(int)
517 jsimd_can_quantize_float (void)
518 {
519 init_simd();
521 return 0;
522 }
524 GLOBAL(void)
525 jsimd_quantize (JCOEFPTR coef_block, DCTELEM * divisors,
526 DCTELEM * workspace)
527 {
528 if (simd_support & JSIMD_ARM_NEON)
529 jsimd_quantize_neon(coef_block, divisors, workspace);
530 }
532 GLOBAL(void)
533 jsimd_quantize_float (JCOEFPTR coef_block, FAST_FLOAT * divisors,
534 FAST_FLOAT * workspace)
535 {
536 }
538 GLOBAL(int)
539 jsimd_can_idct_2x2 (void)
540 {
541 init_simd();
543 /* The code is optimised for these values only */
544 if (DCTSIZE != 8)
545 return 0;
546 if (sizeof(JCOEF) != 2)
547 return 0;
548 if (BITS_IN_JSAMPLE != 8)
549 return 0;
550 if (sizeof(JDIMENSION) != 4)
551 return 0;
552 if (sizeof(ISLOW_MULT_TYPE) != 2)
553 return 0;
555 if ((simd_support & JSIMD_ARM_NEON))
556 return 1;
558 return 0;
559 }
561 GLOBAL(int)
562 jsimd_can_idct_4x4 (void)
563 {
564 init_simd();
566 /* The code is optimised for these values only */
567 if (DCTSIZE != 8)
568 return 0;
569 if (sizeof(JCOEF) != 2)
570 return 0;
571 if (BITS_IN_JSAMPLE != 8)
572 return 0;
573 if (sizeof(JDIMENSION) != 4)
574 return 0;
575 if (sizeof(ISLOW_MULT_TYPE) != 2)
576 return 0;
578 if ((simd_support & JSIMD_ARM_NEON))
579 return 1;
581 return 0;
582 }
584 GLOBAL(void)
585 jsimd_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
586 JCOEFPTR coef_block, JSAMPARRAY output_buf,
587 JDIMENSION output_col)
588 {
589 if ((simd_support & JSIMD_ARM_NEON))
590 jsimd_idct_2x2_neon(compptr->dct_table, coef_block, output_buf, output_col);
591 }
593 GLOBAL(void)
594 jsimd_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
595 JCOEFPTR coef_block, JSAMPARRAY output_buf,
596 JDIMENSION output_col)
597 {
598 if ((simd_support & JSIMD_ARM_NEON))
599 jsimd_idct_4x4_neon(compptr->dct_table, coef_block, output_buf, output_col);
600 }
602 GLOBAL(int)
603 jsimd_can_idct_islow (void)
604 {
605 init_simd();
607 /* The code is optimised for these values only */
608 if (DCTSIZE != 8)
609 return 0;
610 if (sizeof(JCOEF) != 2)
611 return 0;
612 if (BITS_IN_JSAMPLE != 8)
613 return 0;
614 if (sizeof(JDIMENSION) != 4)
615 return 0;
616 if (sizeof(ISLOW_MULT_TYPE) != 2)
617 return 0;
619 if (simd_support & JSIMD_ARM_NEON)
620 return 1;
622 return 0;
623 }
625 GLOBAL(int)
626 jsimd_can_idct_ifast (void)
627 {
628 init_simd();
630 /* The code is optimised for these values only */
631 if (DCTSIZE != 8)
632 return 0;
633 if (sizeof(JCOEF) != 2)
634 return 0;
635 if (BITS_IN_JSAMPLE != 8)
636 return 0;
637 if (sizeof(JDIMENSION) != 4)
638 return 0;
639 if (sizeof(IFAST_MULT_TYPE) != 2)
640 return 0;
641 if (IFAST_SCALE_BITS != 2)
642 return 0;
644 if ((simd_support & JSIMD_ARM_NEON))
645 return 1;
647 return 0;
648 }
650 GLOBAL(int)
651 jsimd_can_idct_float (void)
652 {
653 init_simd();
655 return 0;
656 }
658 GLOBAL(void)
659 jsimd_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
660 JCOEFPTR coef_block, JSAMPARRAY output_buf,
661 JDIMENSION output_col)
662 {
663 if ((simd_support & JSIMD_ARM_NEON))
664 jsimd_idct_islow_neon(compptr->dct_table, coef_block, output_buf, output_col);
665 }
667 GLOBAL(void)
668 jsimd_idct_ifast (j_decompress_ptr cinfo, jpeg_component_info * compptr,
669 JCOEFPTR coef_block, JSAMPARRAY output_buf,
670 JDIMENSION output_col)
671 {
672 if ((simd_support & JSIMD_ARM_NEON))
673 jsimd_idct_ifast_neon(compptr->dct_table, coef_block, output_buf, output_col);
674 }
676 GLOBAL(void)
677 jsimd_idct_float (j_decompress_ptr cinfo, jpeg_component_info * compptr,
678 JCOEFPTR coef_block, JSAMPARRAY output_buf,
679 JDIMENSION output_col)
680 {
681 }