diff --git a/jpeg/fltk_jpeg_prefix.h b/jpeg/fltk_jpeg_prefix.h new file mode 100644 index 000000000..520640a1f --- /dev/null +++ b/jpeg/fltk_jpeg_prefix.h @@ -0,0 +1,173 @@ +/* + * FLTK symbol prefixes for libjpeg + * This is a generated file: see README.bundled-libs.txt +*/ +#define jcopy_block_row fltk_jcopy_block_row +#define jcopy_sample_rows fltk_jcopy_sample_rows +#define jdiv_round_up fltk_jdiv_round_up +#define jinit_1pass_quantizer fltk_jinit_1pass_quantizer +#define jinit_2pass_quantizer fltk_jinit_2pass_quantizer +#define jinit_arith_decoder fltk_jinit_arith_decoder +#define jinit_arith_encoder fltk_jinit_arith_encoder +#define jinit_c_coef_controller fltk_jinit_c_coef_controller +#define jinit_c_main_controller fltk_jinit_c_main_controller +#define jinit_c_master_control fltk_jinit_c_master_control +#define jinit_color_converter fltk_jinit_color_converter +#define jinit_color_deconverter fltk_jinit_color_deconverter +#define jinit_compress_master fltk_jinit_compress_master +#define jinit_c_prep_controller fltk_jinit_c_prep_controller +#define jinit_d_coef_controller fltk_jinit_d_coef_controller +#define jinit_d_main_controller fltk_jinit_d_main_controller +#define jinit_downsampler fltk_jinit_downsampler +#define jinit_d_post_controller fltk_jinit_d_post_controller +#define jinit_forward_dct fltk_jinit_forward_dct +#define jinit_huff_decoder fltk_jinit_huff_decoder +#define jinit_huff_encoder fltk_jinit_huff_encoder +#define jinit_input_controller fltk_jinit_input_controller +#define jinit_inverse_dct fltk_jinit_inverse_dct +#define jinit_marker_reader fltk_jinit_marker_reader +#define jinit_marker_writer fltk_jinit_marker_writer +#define jinit_master_decompress fltk_jinit_master_decompress +#define jinit_memory_mgr fltk_jinit_memory_mgr +#define jinit_merged_upsampler fltk_jinit_merged_upsampler +#define jinit_upsampler fltk_jinit_upsampler +#define jpeg_abort fltk_jpeg_abort +#define jpeg_abort_compress fltk_jpeg_abort_compress +#define jpeg_abort_decompress fltk_jpeg_abort_decompress +#define jpeg_add_quant_table fltk_jpeg_add_quant_table +#define jpeg_alloc_huff_table fltk_jpeg_alloc_huff_table +#define jpeg_alloc_quant_table fltk_jpeg_alloc_quant_table +#define jpeg_aritab fltk_jpeg_aritab +#define jpeg_calc_jpeg_dimensions fltk_jpeg_calc_jpeg_dimensions +#define jpeg_calc_output_dimensions fltk_jpeg_calc_output_dimensions +#define jpeg_consume_input fltk_jpeg_consume_input +#define jpeg_copy_critical_parameters fltk_jpeg_copy_critical_parameters +#define jpeg_core_output_dimensions fltk_jpeg_core_output_dimensions +#define jpeg_CreateCompress fltk_jpeg_CreateCompress +#define jpeg_CreateDecompress fltk_jpeg_CreateDecompress +#define jpeg_default_colorspace fltk_jpeg_default_colorspace +#define jpeg_default_qtables fltk_jpeg_default_qtables +#define jpeg_destroy fltk_jpeg_destroy +#define jpeg_destroy_compress fltk_jpeg_destroy_compress +#define jpeg_destroy_decompress fltk_jpeg_destroy_decompress +#define jpeg_fdct_10x10 fltk_jpeg_fdct_10x10 +#define jpeg_fdct_10x5 fltk_jpeg_fdct_10x5 +#define jpeg_fdct_11x11 fltk_jpeg_fdct_11x11 +#define jpeg_fdct_12x12 fltk_jpeg_fdct_12x12 +#define jpeg_fdct_12x6 fltk_jpeg_fdct_12x6 +#define jpeg_fdct_13x13 fltk_jpeg_fdct_13x13 +#define jpeg_fdct_14x14 fltk_jpeg_fdct_14x14 +#define jpeg_fdct_14x7 fltk_jpeg_fdct_14x7 +#define jpeg_fdct_15x15 fltk_jpeg_fdct_15x15 +#define jpeg_fdct_16x16 fltk_jpeg_fdct_16x16 +#define jpeg_fdct_16x8 fltk_jpeg_fdct_16x8 +#define jpeg_fdct_1x1 fltk_jpeg_fdct_1x1 +#define jpeg_fdct_1x2 fltk_jpeg_fdct_1x2 +#define jpeg_fdct_2x1 fltk_jpeg_fdct_2x1 +#define jpeg_fdct_2x2 fltk_jpeg_fdct_2x2 +#define jpeg_fdct_2x4 fltk_jpeg_fdct_2x4 +#define jpeg_fdct_3x3 fltk_jpeg_fdct_3x3 +#define jpeg_fdct_3x6 fltk_jpeg_fdct_3x6 +#define jpeg_fdct_4x2 fltk_jpeg_fdct_4x2 +#define jpeg_fdct_4x4 fltk_jpeg_fdct_4x4 +#define jpeg_fdct_4x8 fltk_jpeg_fdct_4x8 +#define jpeg_fdct_5x10 fltk_jpeg_fdct_5x10 +#define jpeg_fdct_5x5 fltk_jpeg_fdct_5x5 +#define jpeg_fdct_6x12 fltk_jpeg_fdct_6x12 +#define jpeg_fdct_6x3 fltk_jpeg_fdct_6x3 +#define jpeg_fdct_6x6 fltk_jpeg_fdct_6x6 +#define jpeg_fdct_7x14 fltk_jpeg_fdct_7x14 +#define jpeg_fdct_7x7 fltk_jpeg_fdct_7x7 +#define jpeg_fdct_8x16 fltk_jpeg_fdct_8x16 +#define jpeg_fdct_8x4 fltk_jpeg_fdct_8x4 +#define jpeg_fdct_9x9 fltk_jpeg_fdct_9x9 +#define jpeg_fdct_float fltk_jpeg_fdct_float +#define jpeg_fdct_ifast fltk_jpeg_fdct_ifast +#define jpeg_fdct_islow fltk_jpeg_fdct_islow +#define jpeg_finish_compress fltk_jpeg_finish_compress +#define jpeg_finish_decompress fltk_jpeg_finish_decompress +#define jpeg_finish_output fltk_jpeg_finish_output +#define jpeg_free_large fltk_jpeg_free_large +#define jpeg_free_small fltk_jpeg_free_small +#define jpeg_get_large fltk_jpeg_get_large +#define jpeg_get_small fltk_jpeg_get_small +#define jpeg_has_multiple_scans fltk_jpeg_has_multiple_scans +#define jpeg_idct_10x10 fltk_jpeg_idct_10x10 +#define jpeg_idct_10x5 fltk_jpeg_idct_10x5 +#define jpeg_idct_11x11 fltk_jpeg_idct_11x11 +#define jpeg_idct_12x12 fltk_jpeg_idct_12x12 +#define jpeg_idct_12x6 fltk_jpeg_idct_12x6 +#define jpeg_idct_13x13 fltk_jpeg_idct_13x13 +#define jpeg_idct_14x14 fltk_jpeg_idct_14x14 +#define jpeg_idct_14x7 fltk_jpeg_idct_14x7 +#define jpeg_idct_15x15 fltk_jpeg_idct_15x15 +#define jpeg_idct_16x16 fltk_jpeg_idct_16x16 +#define jpeg_idct_16x8 fltk_jpeg_idct_16x8 +#define jpeg_idct_1x1 fltk_jpeg_idct_1x1 +#define jpeg_idct_1x2 fltk_jpeg_idct_1x2 +#define jpeg_idct_2x1 fltk_jpeg_idct_2x1 +#define jpeg_idct_2x2 fltk_jpeg_idct_2x2 +#define jpeg_idct_2x4 fltk_jpeg_idct_2x4 +#define jpeg_idct_3x3 fltk_jpeg_idct_3x3 +#define jpeg_idct_3x6 fltk_jpeg_idct_3x6 +#define jpeg_idct_4x2 fltk_jpeg_idct_4x2 +#define jpeg_idct_4x4 fltk_jpeg_idct_4x4 +#define jpeg_idct_4x8 fltk_jpeg_idct_4x8 +#define jpeg_idct_5x10 fltk_jpeg_idct_5x10 +#define jpeg_idct_5x5 fltk_jpeg_idct_5x5 +#define jpeg_idct_6x12 fltk_jpeg_idct_6x12 +#define jpeg_idct_6x3 fltk_jpeg_idct_6x3 +#define jpeg_idct_6x6 fltk_jpeg_idct_6x6 +#define jpeg_idct_7x14 fltk_jpeg_idct_7x14 +#define jpeg_idct_7x7 fltk_jpeg_idct_7x7 +#define jpeg_idct_8x16 fltk_jpeg_idct_8x16 +#define jpeg_idct_8x4 fltk_jpeg_idct_8x4 +#define jpeg_idct_9x9 fltk_jpeg_idct_9x9 +#define jpeg_idct_float fltk_jpeg_idct_float +#define jpeg_idct_ifast fltk_jpeg_idct_ifast +#define jpeg_idct_islow fltk_jpeg_idct_islow +#define jpeg_input_complete fltk_jpeg_input_complete +#define jpeg_mem_available fltk_jpeg_mem_available +#define jpeg_mem_dest fltk_jpeg_mem_dest +#define jpeg_mem_init fltk_jpeg_mem_init +#define jpeg_mem_src fltk_jpeg_mem_src +#define jpeg_mem_term fltk_jpeg_mem_term +#define jpeg_natural_order fltk_jpeg_natural_order +#define jpeg_natural_order2 fltk_jpeg_natural_order2 +#define jpeg_natural_order3 fltk_jpeg_natural_order3 +#define jpeg_natural_order4 fltk_jpeg_natural_order4 +#define jpeg_natural_order5 fltk_jpeg_natural_order5 +#define jpeg_natural_order6 fltk_jpeg_natural_order6 +#define jpeg_natural_order7 fltk_jpeg_natural_order7 +#define jpeg_new_colormap fltk_jpeg_new_colormap +#define jpeg_open_backing_store fltk_jpeg_open_backing_store +#define jpeg_quality_scaling fltk_jpeg_quality_scaling +#define jpeg_read_coefficients fltk_jpeg_read_coefficients +#define jpeg_read_header fltk_jpeg_read_header +#define jpeg_read_raw_data fltk_jpeg_read_raw_data +#define jpeg_read_scanlines fltk_jpeg_read_scanlines +#define jpeg_resync_to_restart fltk_jpeg_resync_to_restart +#define jpeg_save_markers fltk_jpeg_save_markers +#define jpeg_set_colorspace fltk_jpeg_set_colorspace +#define jpeg_set_defaults fltk_jpeg_set_defaults +#define jpeg_set_linear_quality fltk_jpeg_set_linear_quality +#define jpeg_set_marker_processor fltk_jpeg_set_marker_processor +#define jpeg_set_quality fltk_jpeg_set_quality +#define jpeg_simple_progression fltk_jpeg_simple_progression +#define jpeg_start_compress fltk_jpeg_start_compress +#define jpeg_start_decompress fltk_jpeg_start_decompress +#define jpeg_start_output fltk_jpeg_start_output +#define jpeg_std_error fltk_jpeg_std_error +#define jpeg_std_huff_table fltk_jpeg_std_huff_table +#define jpeg_stdio_dest fltk_jpeg_stdio_dest +#define jpeg_stdio_src fltk_jpeg_stdio_src +#define jpeg_std_message_table fltk_jpeg_std_message_table +#define jpeg_suppress_tables fltk_jpeg_suppress_tables +#define jpeg_write_coefficients fltk_jpeg_write_coefficients +#define jpeg_write_marker fltk_jpeg_write_marker +#define jpeg_write_m_byte fltk_jpeg_write_m_byte +#define jpeg_write_m_header fltk_jpeg_write_m_header +#define jpeg_write_raw_data fltk_jpeg_write_raw_data +#define jpeg_write_scanlines fltk_jpeg_write_scanlines +#define jpeg_write_tables fltk_jpeg_write_tables +#define jround_up fltk_jround_up diff --git a/png/pngprefix.h b/png/pngprefix.h new file mode 100644 index 000000000..c6d171913 --- /dev/null +++ b/png/pngprefix.h @@ -0,0 +1,177 @@ +/* + * FLTK symbol prefixes for libpng + * This is a generated file: see README.bundled-libs.txt +*/ +#define png_sRGB_table fltk_png_sRGB_table +#define png_sRGB_base fltk_png_sRGB_base +#define png_sRGB_delta fltk_png_sRGB_delta +#define png_zstream_error fltk_png_zstream_error +#define png_free_buffer_list fltk_png_free_buffer_list +#define png_fixed fltk_png_fixed +#define png_user_version_check fltk_png_user_version_check +#define png_malloc_base fltk_png_malloc_base +#define png_malloc_array fltk_png_malloc_array +#define png_realloc_array fltk_png_realloc_array +#define png_create_png_struct fltk_png_create_png_struct +#define png_destroy_png_struct fltk_png_destroy_png_struct +#define png_free_jmpbuf fltk_png_free_jmpbuf +#define png_zalloc fltk_png_zalloc +#define png_zfree fltk_png_zfree +#define png_default_read_data fltk_png_default_read_data +#define png_push_fill_buffer fltk_png_push_fill_buffer +#define png_default_write_data fltk_png_default_write_data +#define png_default_flush fltk_png_default_flush +#define png_reset_crc fltk_png_reset_crc +#define png_write_data fltk_png_write_data +#define png_read_sig fltk_png_read_sig +#define png_read_chunk_header fltk_png_read_chunk_header +#define png_read_data fltk_png_read_data +#define png_crc_read fltk_png_crc_read +#define png_crc_finish fltk_png_crc_finish +#define png_crc_error fltk_png_crc_error +#define png_calculate_crc fltk_png_calculate_crc +#define png_flush fltk_png_flush +#define png_write_IHDR fltk_png_write_IHDR +#define png_write_PLTE fltk_png_write_PLTE +#define png_compress_IDAT fltk_png_compress_IDAT +#define png_write_IEND fltk_png_write_IEND +#define png_write_gAMA_fixed fltk_png_write_gAMA_fixed +#define png_write_sBIT fltk_png_write_sBIT +#define png_write_cHRM_fixed fltk_png_write_cHRM_fixed +#define png_write_sRGB fltk_png_write_sRGB +#define png_write_eXIf fltk_png_write_eXIf +#define png_write_iCCP fltk_png_write_iCCP +#define png_write_sPLT fltk_png_write_sPLT +#define png_write_tRNS fltk_png_write_tRNS +#define png_write_bKGD fltk_png_write_bKGD +#define png_write_hIST fltk_png_write_hIST +#define png_write_tEXt fltk_png_write_tEXt +#define png_write_zTXt fltk_png_write_zTXt +#define png_write_iTXt fltk_png_write_iTXt +#define png_set_text_2 fltk_png_set_text_2 +#define png_write_oFFs fltk_png_write_oFFs +#define png_write_pCAL fltk_png_write_pCAL +#define png_write_pHYs fltk_png_write_pHYs +#define png_write_tIME fltk_png_write_tIME +#define png_write_sCAL_s fltk_png_write_sCAL_s +#define png_write_finish_row fltk_png_write_finish_row +#define png_write_start_row fltk_png_write_start_row +#define png_combine_row fltk_png_combine_row +#define png_do_read_interlace fltk_png_do_read_interlace +#define png_do_write_interlace fltk_png_do_write_interlace +#define png_read_filter_row fltk_png_read_filter_row +#define png_read_filter_row_up_neon fltk_png_read_filter_row_up_neon +#define png_read_filter_row_sub3_neon fltk_png_read_filter_row_sub3_neon +#define png_read_filter_row_sub4_neon fltk_png_read_filter_row_sub4_neon +#define png_read_filter_row_avg3_neon fltk_png_read_filter_row_avg3_neon +#define png_read_filter_row_avg4_neon fltk_png_read_filter_row_avg4_neon +#define png_read_filter_row_paeth3_neon fltk_png_read_filter_row_paeth3_neon +#define png_read_filter_row_paeth4_neon fltk_png_read_filter_row_paeth4_neon +#define png_write_find_filter fltk_png_write_find_filter +#define png_read_IDAT_data fltk_png_read_IDAT_data +#define png_read_finish_IDAT fltk_png_read_finish_IDAT +#define png_read_finish_row fltk_png_read_finish_row +#define png_read_start_row fltk_png_read_start_row +#define png_zlib_inflate fltk_png_zlib_inflate +#define png_read_transform_info fltk_png_read_transform_info +#define png_do_strip_channel fltk_png_do_strip_channel +#define png_do_swap fltk_png_do_swap +#define png_do_packswap fltk_png_do_packswap +#define png_do_invert fltk_png_do_invert +#define png_do_bgr fltk_png_do_bgr +#define png_handle_IHDR fltk_png_handle_IHDR +#define png_handle_PLTE fltk_png_handle_PLTE +#define png_handle_IEND fltk_png_handle_IEND +#define png_handle_bKGD fltk_png_handle_bKGD +#define png_handle_cHRM fltk_png_handle_cHRM +#define png_handle_eXIf fltk_png_handle_eXIf +#define png_handle_gAMA fltk_png_handle_gAMA +#define png_handle_hIST fltk_png_handle_hIST +#define png_handle_iCCP fltk_png_handle_iCCP +#define png_handle_iTXt fltk_png_handle_iTXt +#define png_handle_oFFs fltk_png_handle_oFFs +#define png_handle_pCAL fltk_png_handle_pCAL +#define png_handle_pHYs fltk_png_handle_pHYs +#define png_handle_sBIT fltk_png_handle_sBIT +#define png_handle_sCAL fltk_png_handle_sCAL +#define png_handle_sPLT fltk_png_handle_sPLT +#define png_handle_sRGB fltk_png_handle_sRGB +#define png_handle_tEXt fltk_png_handle_tEXt +#define png_handle_tIME fltk_png_handle_tIME +#define png_handle_tRNS fltk_png_handle_tRNS +#define png_handle_zTXt fltk_png_handle_zTXt +#define png_check_chunk_name fltk_png_check_chunk_name +#define png_check_chunk_length fltk_png_check_chunk_length +#define png_handle_unknown fltk_png_handle_unknown +#define png_chunk_unknown_handling fltk_png_chunk_unknown_handling +#define png_do_read_transformations fltk_png_do_read_transformations +#define png_do_write_transformations fltk_png_do_write_transformations +#define png_init_read_transformations fltk_png_init_read_transformations +#define png_push_read_chunk fltk_png_push_read_chunk +#define png_push_read_sig fltk_png_push_read_sig +#define png_push_check_crc fltk_png_push_check_crc +#define png_push_save_buffer fltk_png_push_save_buffer +#define png_push_restore_buffer fltk_png_push_restore_buffer +#define png_push_read_IDAT fltk_png_push_read_IDAT +#define png_process_IDAT_data fltk_png_process_IDAT_data +#define png_push_process_row fltk_png_push_process_row +#define png_push_handle_unknown fltk_png_push_handle_unknown +#define png_push_have_info fltk_png_push_have_info +#define png_push_have_end fltk_png_push_have_end +#define png_push_have_row fltk_png_push_have_row +#define png_push_read_end fltk_png_push_read_end +#define png_process_some_data fltk_png_process_some_data +#define png_read_push_finish_row fltk_png_read_push_finish_row +#define png_push_handle_tEXt fltk_png_push_handle_tEXt +#define png_push_read_tEXt fltk_png_push_read_tEXt +#define png_push_handle_zTXt fltk_png_push_handle_zTXt +#define png_push_read_zTXt fltk_png_push_read_zTXt +#define png_push_handle_iTXt fltk_png_push_handle_iTXt +#define png_push_read_iTXt fltk_png_push_read_iTXt +#define png_colorspace_set_gamma fltk_png_colorspace_set_gamma +#define png_colorspace_sync_info fltk_png_colorspace_sync_info +#define png_colorspace_sync fltk_png_colorspace_sync +#define png_colorspace_set_chromaticities fltk_png_colorspace_set_chromaticities +#define png_colorspace_set_endpoints fltk_png_colorspace_set_endpoints +#define png_colorspace_set_sRGB fltk_png_colorspace_set_sRGB +#define png_colorspace_set_ICC fltk_png_colorspace_set_ICC +#define png_icc_check_length fltk_png_icc_check_length +#define png_icc_check_header fltk_png_icc_check_header +#define png_icc_check_tag_table fltk_png_icc_check_tag_table +#define png_icc_set_sRGB fltk_png_icc_set_sRGB +#define png_colorspace_set_rgb_coefficients fltk_png_colorspace_set_rgb_coefficients +#define png_check_IHDR fltk_png_check_IHDR +#define png_do_check_palette_indexes fltk_png_do_check_palette_indexes +#define png_fixed_error fltk_png_fixed_error +#define png_safecat fltk_png_safecat +#define png_format_number fltk_png_format_number +#define png_warning_parameter fltk_png_warning_parameter +#define png_warning_parameter_unsigned fltk_png_warning_parameter_unsigned +#define png_warning_parameter_signed fltk_png_warning_parameter_signed +#define png_formatted_warning fltk_png_formatted_warning +#define png_app_warning fltk_png_app_warning +#define png_app_error fltk_png_app_error +#define png_chunk_report fltk_png_chunk_report +#define png_ascii_from_fp fltk_png_ascii_from_fp +#define png_ascii_from_fixed fltk_png_ascii_from_fixed +#define png_check_fp_number fltk_png_check_fp_number +#define png_check_fp_string fltk_png_check_fp_string +#define png_muldiv fltk_png_muldiv +#define png_muldiv_warn fltk_png_muldiv_warn +#define png_reciprocal fltk_png_reciprocal +#define png_reciprocal2 fltk_png_reciprocal2 +#define png_gamma_significant fltk_png_gamma_significant +#define png_gamma_correct fltk_png_gamma_correct +#define png_gamma_16bit_correct fltk_png_gamma_16bit_correct +#define png_gamma_8bit_correct fltk_png_gamma_8bit_correct +#define png_destroy_gamma_table fltk_png_destroy_gamma_table +#define png_build_gamma_table fltk_png_build_gamma_table +#define png_safe_error fltk_png_safe_error +#define png_safe_warning fltk_png_safe_warning +#define png_safe_execute fltk_png_safe_execute +#define png_image_error fltk_png_image_error +#define png_init_filter_functions_neon fltk_png_init_filter_functions_neon +#define png_check_keyword fltk_png_check_keyword +#define png_riffle_palette_neon fltk_png_riffle_palette_neon +#define png_do_expand_palette_rgba8_neon fltk_png_do_expand_palette_rgba8_neon +#define png_do_expand_palette_rgb8_neon fltk_png_do_expand_palette_rgb8_neon diff --git a/png/powerpc/filter_vsx_intrinsics.c b/png/powerpc/filter_vsx_intrinsics.c new file mode 100644 index 000000000..01cf8800d --- /dev/null +++ b/png/powerpc/filter_vsx_intrinsics.c @@ -0,0 +1,768 @@ +/* filter_vsx_intrinsics.c - PowerPC optimised filter functions + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2017 Glenn Randers-Pehrson + * Written by Vadim Barkov, 2017. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +#include +#include +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +/* This code requires -maltivec and -mvsx on the command line: */ +#if PNG_POWERPC_VSX_IMPLEMENTATION == 1 /* intrinsics code from pngpriv.h */ + +#include + +#if PNG_POWERPC_VSX_OPT > 0 + +#ifndef __VSX__ +# error "This code requires VSX support (POWER7 and later). Please provide -mvsx compiler flag." +#endif + +#define vec_ld_unaligned(vec,data) vec = vec_vsx_ld(0,data) +#define vec_st_unaligned(vec,data) vec_vsx_st(vec,0,data) + + +/* Functions in this file look at most 3 pixels (a,b,c) to predict the 4th (d). + * They're positioned like this: + * prev: c b + * row: a d + * The Sub filter predicts d=a, Avg d=(a+b)/2, and Paeth predicts d to be + * whichever of a, b, or c is closest to p=a+b-c. + * ( this is taken from ../intel/filter_sse2_intrinsics.c ) + */ + +#define vsx_declare_common_vars(row_info,row,prev_row,offset) \ + png_byte i;\ + png_bytep rp = row + offset;\ + png_const_bytep pp = prev_row;\ + size_t unaligned_top = 16 - (((size_t)rp % 16));\ + size_t istop;\ + if(unaligned_top == 16)\ + unaligned_top = 0;\ + istop = row_info->rowbytes;\ + if((unaligned_top < istop))\ + istop -= unaligned_top;\ + else{\ + unaligned_top = istop;\ + istop = 0;\ + } + +void png_read_filter_row_up_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + vector unsigned char rp_vec; + vector unsigned char pp_vec; + vsx_declare_common_vars(row_info,row,prev_row,0) + + /* Altivec operations require 16-byte aligned data + * but input can be unaligned. So we calculate + * unaligned part as usual. + */ + for (i = 0; i < unaligned_top; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); + rp++; + } + + /* Using SIMD while we can */ + while( istop >= 16 ) + { + rp_vec = vec_ld(0,rp); + vec_ld_unaligned(pp_vec,pp); + + rp_vec = vec_add(rp_vec,pp_vec); + + vec_st(rp_vec,0,rp); + + pp += 16; + rp += 16; + istop -= 16; + } + + if(istop > 0) + { + /* If byte count of row is not divisible by 16 + * we will process remaining part as usual + */ + for (i = 0; i < istop; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*pp++)) & 0xff); + rp++; + } +} + +} + +static const vector unsigned char VSX_LEFTSHIFTED1_4 = {16,16,16,16, 0, 1, 2, 3,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_LEFTSHIFTED2_4 = {16,16,16,16,16,16,16,16, 4, 5, 6, 7,16,16,16,16}; +static const vector unsigned char VSX_LEFTSHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 8, 9,10,11}; + +static const vector unsigned char VSX_LEFTSHIFTED1_3 = {16,16,16, 0, 1, 2,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_LEFTSHIFTED2_3 = {16,16,16,16,16,16, 3, 4, 5,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_LEFTSHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 6, 7, 8,16,16,16,16}; +static const vector unsigned char VSX_LEFTSHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 9,10,11,16}; + +static const vector unsigned char VSX_NOT_SHIFTED1_4 = {16,16,16,16, 4, 5, 6, 7,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_NOT_SHIFTED2_4 = {16,16,16,16,16,16,16,16, 8, 9,10,11,16,16,16,16}; +static const vector unsigned char VSX_NOT_SHIFTED3_4 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,15}; + +static const vector unsigned char VSX_NOT_SHIFTED1_3 = {16,16,16, 3, 4, 5,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_NOT_SHIFTED2_3 = {16,16,16,16,16,16, 6, 7, 8,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_NOT_SHIFTED3_3 = {16,16,16,16,16,16,16,16,16, 9,10,11,16,16,16,16}; +static const vector unsigned char VSX_NOT_SHIFTED4_3 = {16,16,16,16,16,16,16,16,16,16,16,16,12,13,14,16}; + +static const vector unsigned char VSX_CHAR_ZERO = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; +#ifdef __LITTLE_ENDIAN__ + +static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = { 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = { 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {12,16,13,16,14,16,15,16,16,16,16,16,16,16,16,16}; + +static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 0, 2, 4, 6,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 0, 2, 4, 6,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4, 6}; + +static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = { 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = { 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = { 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {12,16,13,16,14,16,16,16,16,16,16,16,16,16,16,16}; + +static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 0, 2, 4,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 0, 2, 4,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 0, 2, 4,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 0, 2, 4,16}; + +#elif defined(__BIG_ENDIAN__) + +static const vector unsigned char VSX_CHAR_TO_SHORT1_4 = {16, 4,16, 5,16, 6,16, 7,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT2_4 = {16, 8,16, 9,16,10,16,11,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT3_4 = {16,12,16,13,16,14,16,15,16,16,16,16,16,16,16,16}; + +static const vector unsigned char VSX_SHORT_TO_CHAR1_4 = {16,16,16,16, 1, 3, 5, 7,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR2_4 = {16,16,16,16,16,16,16,16, 1, 3, 5, 7,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR3_4 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5, 7}; + +static const vector unsigned char VSX_CHAR_TO_SHORT1_3 = {16, 3,16, 4,16, 5,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT2_3 = {16, 6,16, 7,16, 8,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT3_3 = {16, 9,16,10,16,11,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_CHAR_TO_SHORT4_3 = {16,12,16,13,16,14,16,16,16,16,16,16,16,16,16,16}; + +static const vector unsigned char VSX_SHORT_TO_CHAR1_3 = {16,16,16, 1, 3, 5,16,16,16,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR2_3 = {16,16,16,16,16,16, 1, 3, 5,16,16,16,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR3_3 = {16,16,16,16,16,16,16,16,16, 1, 3, 5,16,16,16,16}; +static const vector unsigned char VSX_SHORT_TO_CHAR4_3 = {16,16,16,16,16,16,16,16,16,16,16,16, 1, 3, 5,16}; + +#endif + +#define vsx_char_to_short(vec,offset,bpp) (vector unsigned short)vec_perm((vec),VSX_CHAR_ZERO,VSX_CHAR_TO_SHORT##offset##_##bpp) +#define vsx_short_to_char(vec,offset,bpp) vec_perm(((vector unsigned char)(vec)),VSX_CHAR_ZERO,VSX_SHORT_TO_CHAR##offset##_##bpp) + +#ifdef PNG_USE_ABS +# define vsx_abs(number) abs(number) +#else +# define vsx_abs(number) (number > 0) ? (number) : -(number) +#endif + +void png_read_filter_row_sub4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 4; + + vector unsigned char rp_vec; + vector unsigned char part_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + + PNG_UNUSED(pp) + + /* Altivec operations require 16-byte aligned data + * but input can be unaligned. So we calculate + * unaligned part as usual. + */ + for (i = 0; i < unaligned_top; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } + + /* Using SIMD while we can */ + while( istop >= 16 ) + { + for(i=0;i < bpp ; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } + rp -= bpp; + + rp_vec = vec_ld(0,rp); + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4); + rp_vec = vec_add(rp_vec,part_vec); + + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4); + rp_vec = vec_add(rp_vec,part_vec); + + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4); + rp_vec = vec_add(rp_vec,part_vec); + + vec_st(rp_vec,0,rp); + + rp += 16; + istop -= 16; + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp - bpp))) & 0xff); + rp++; + } + +} + +void png_read_filter_row_sub3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 3; + + vector unsigned char rp_vec; + vector unsigned char part_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + + PNG_UNUSED(pp) + + /* Altivec operations require 16-byte aligned data + * but input can be unaligned. So we calculate + * unaligned part as usual. + */ + for (i = 0; i < unaligned_top; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } + + /* Using SIMD while we can */ + while( istop >= 16 ) + { + for(i=0;i < bpp ; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } + rp -= bpp; + + rp_vec = vec_ld(0,rp); + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3); + rp_vec = vec_add(rp_vec,part_vec); + + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3); + rp_vec = vec_add(rp_vec,part_vec); + + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3); + rp_vec = vec_add(rp_vec,part_vec); + + part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3); + rp_vec = vec_add(rp_vec,part_vec); + + vec_st(rp_vec,0,rp); + rp += 15; + istop -= 16; + + /* Since 16 % bpp = 16 % 3 = 1, last element of array must + * be proceeded manually + */ + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + *rp = (png_byte)(((int)(*rp) + (int)(*(rp-bpp))) & 0xff); + rp++; + } +} + +void png_read_filter_row_avg4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 4; + + vector unsigned char rp_vec; + vector unsigned char pp_vec; + vector unsigned char pp_part_vec; + vector unsigned char rp_part_vec; + vector unsigned char avg_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + rp -= bpp; + if(istop >= bpp) + istop -= bpp; + + for (i = 0; i < bpp; i++) + { + *rp = (png_byte)(((int)(*rp) + + ((int)(*pp++) / 2 )) & 0xff); + + rp++; + } + + /* Altivec operations require 16-byte aligned data + * but input can be unaligned. So we calculate + * unaligned part as usual. + */ + for (i = 0; i < unaligned_top; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } + + /* Using SIMD while we can */ + while( istop >= 16 ) + { + for(i=0;i < bpp ; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } + rp -= bpp; + pp -= bpp; + + vec_ld_unaligned(pp_vec,pp); + rp_vec = vec_ld(0,rp); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_4); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_4); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_4); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_4); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_4); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_4); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + vec_st(rp_vec,0,rp); + + rp += 16; + pp += 16; + istop -= 16; + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } +} + +void png_read_filter_row_avg3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 3; + + vector unsigned char rp_vec; + vector unsigned char pp_vec; + vector unsigned char pp_part_vec; + vector unsigned char rp_part_vec; + vector unsigned char avg_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + rp -= bpp; + if(istop >= bpp) + istop -= bpp; + + for (i = 0; i < bpp; i++) + { + *rp = (png_byte)(((int)(*rp) + + ((int)(*pp++) / 2 )) & 0xff); + + rp++; + } + + /* Altivec operations require 16-byte aligned data + * but input can be unaligned. So we calculate + * unaligned part as usual. + */ + for (i = 0; i < unaligned_top; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } + + /* Using SIMD while we can */ + while( istop >= 16 ) + { + for(i=0;i < bpp ; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } + rp -= bpp; + pp -= bpp; + + vec_ld_unaligned(pp_vec,pp); + rp_vec = vec_ld(0,rp); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED1_3); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED1_3); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED2_3); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED2_3); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED3_3); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED3_3); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + rp_part_vec = vec_perm(rp_vec,VSX_CHAR_ZERO,VSX_LEFTSHIFTED4_3); + pp_part_vec = vec_perm(pp_vec,VSX_CHAR_ZERO,VSX_NOT_SHIFTED4_3); + avg_vec = vec_avg(rp_part_vec,pp_part_vec); + avg_vec = vec_sub(avg_vec, vec_and(vec_xor(rp_part_vec,pp_part_vec),vec_splat_u8(1))); + rp_vec = vec_add(rp_vec,avg_vec); + + vec_st(rp_vec,0,rp); + + rp += 15; + pp += 15; + istop -= 16; + + /* Since 16 % bpp = 16 % 3 = 1, last element of array must + * be proceeded manually + */ + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + rp++; + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + *rp = (png_byte)(((int)(*rp) + + (int)(*pp++ + *(rp-bpp)) / 2 ) & 0xff); + + rp++; + } +} + +/* Bytewise c ? t : e. */ +#define if_then_else(c,t,e) vec_sel(e,t,c) + +#define vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) {\ + c = *(pp - bpp);\ + a = *(rp - bpp);\ + b = *pp++;\ + p = b - c;\ + pc = a - c;\ + pa = vsx_abs(p);\ + pb = vsx_abs(pc);\ + pc = vsx_abs(p + pc);\ + if (pb < pa) pa = pb, a = b;\ + if (pc < pa) a = c;\ + a += *rp;\ + *rp++ = (png_byte)a;\ + } + +void png_read_filter_row_paeth4_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 4; + + int a, b, c, pa, pb, pc, p; + vector unsigned char rp_vec; + vector unsigned char pp_vec; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + rp -= bpp; + if(istop >= bpp) + istop -= bpp; + + /* Process the first pixel in the row completely (this is the same as 'up' + * because there is only one candidate predictor for the first row). + */ + for(i = 0; i < bpp ; i++) + { + *rp = (png_byte)( *rp + *pp); + rp++; + pp++; + } + + for(i = 0; i < unaligned_top ; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } + + while( istop >= 16) + { + for(i = 0; i < bpp ; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } + + rp -= bpp; + pp -= bpp; + rp_vec = vec_ld(0,rp); + vec_ld_unaligned(pp_vec,pp); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_4),1,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_4),1,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,4))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_4),2,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_4),2,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,4))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_4),3,4); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_4),3,4); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,4))); + + vec_st(rp_vec,0,rp); + + rp += 16; + pp += 16; + istop -= 16; + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } +} + +void png_read_filter_row_paeth3_vsx(png_row_infop row_info, png_bytep row, + png_const_bytep prev_row) +{ + png_byte bpp = 3; + + int a, b, c, pa, pb, pc, p; + vector unsigned char rp_vec; + vector unsigned char pp_vec; + vector unsigned short a_vec,b_vec,c_vec,nearest_vec; + vector signed short pa_vec,pb_vec,pc_vec,smallest_vec; + + vsx_declare_common_vars(row_info,row,prev_row,bpp) + rp -= bpp; + if(istop >= bpp) + istop -= bpp; + + /* Process the first pixel in the row completely (this is the same as 'up' + * because there is only one candidate predictor for the first row). + */ + for(i = 0; i < bpp ; i++) + { + *rp = (png_byte)( *rp + *pp); + rp++; + pp++; + } + + for(i = 0; i < unaligned_top ; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } + + while( istop >= 16) + { + for(i = 0; i < bpp ; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } + + rp -= bpp; + pp -= bpp; + rp_vec = vec_ld(0,rp); + vec_ld_unaligned(pp_vec,pp); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED1_3),1,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED1_3),1,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,1,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED2_3),2,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED2_3),2,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,2,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED3_3),3,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED3_3),3,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,3,3))); + + a_vec = vsx_char_to_short(vec_perm(rp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3); + b_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_NOT_SHIFTED4_3),4,3); + c_vec = vsx_char_to_short(vec_perm(pp_vec , VSX_CHAR_ZERO , VSX_LEFTSHIFTED4_3),4,3); + pa_vec = (vector signed short) vec_sub(b_vec,c_vec); + pb_vec = (vector signed short) vec_sub(a_vec , c_vec); + pc_vec = vec_add(pa_vec,pb_vec); + pa_vec = vec_abs(pa_vec); + pb_vec = vec_abs(pb_vec); + pc_vec = vec_abs(pc_vec); + smallest_vec = vec_min(pc_vec, vec_min(pa_vec,pb_vec)); + nearest_vec = if_then_else( + vec_cmpeq(pa_vec,smallest_vec), + a_vec, + if_then_else( + vec_cmpeq(pb_vec,smallest_vec), + b_vec, + c_vec + ) + ); + rp_vec = vec_add(rp_vec,(vsx_short_to_char(nearest_vec,4,3))); + + vec_st(rp_vec,0,rp); + + rp += 15; + pp += 15; + istop -= 16; + + /* Since 16 % bpp = 16 % 3 = 1, last element of array must + * be proceeded manually + */ + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } + + if(istop > 0) + for (i = 0; i < istop % 16; i++) + { + vsx_paeth_process(rp,pp,a,b,c,pa,pb,pc,bpp) + } +} + +#endif /* PNG_POWERPC_VSX_OPT > 0 */ +#endif /* PNG_POWERPC_VSX_IMPLEMENTATION == 1 (intrinsics) */ +#endif /* READ */ diff --git a/png/powerpc/powerpc_init.c b/png/powerpc/powerpc_init.c new file mode 100644 index 000000000..54426c558 --- /dev/null +++ b/png/powerpc/powerpc_init.c @@ -0,0 +1,126 @@ + +/* powerpc_init.c - POWERPC optimised filter functions + * + * Copyright (c) 2018 Cosmin Truta + * Copyright (c) 2017 Glenn Randers-Pehrson + * Written by Vadim Barkov, 2017. + * + * This code is released under the libpng license. + * For conditions of distribution and use, see the disclaimer + * and license in png.h + */ + +/* Below, after checking __linux__, various non-C90 POSIX 1003.1 functions are + * called. + */ +#define _POSIX_SOURCE 1 + +#include +#include "../pngpriv.h" + +#ifdef PNG_READ_SUPPORTED + +#if PNG_POWERPC_VSX_OPT > 0 +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED /* Do run-time checks */ +/* WARNING: it is strongly recommended that you do not build libpng with + * run-time checks for CPU features if at all possible. In the case of the PowerPC + * VSX instructions there is no processor-specific way of detecting the + * presence of the required support, therefore run-time detection is extremely + * OS specific. + * + * You may set the macro PNG_POWERPC_VSX_FILE to the file name of file containing + * a fragment of C source code which defines the png_have_vsx function. There + * are a number of implementations in contrib/powerpc-vsx, but the only one that + * has partial support is contrib/powerpc-vsx/linux.c - a generic Linux + * implementation which reads /proc/cpufino. + */ +#ifndef PNG_POWERPC_VSX_FILE +# ifdef __linux__ +# define PNG_POWERPC_VSX_FILE "contrib/powerpc-vsx/linux_aux.c" +# endif +#endif + +#ifdef PNG_POWERPC_VSX_FILE + +#include /* for sig_atomic_t */ +static int png_have_vsx(png_structp png_ptr); +#include PNG_POWERPC_VSX_FILE + +#else /* PNG_POWERPC_VSX_FILE */ +# error "PNG_POWERPC_VSX_FILE undefined: no support for run-time POWERPC VSX checks" +#endif /* PNG_POWERPC_VSX_FILE */ +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ + +void +png_init_filter_functions_vsx(png_structp pp, unsigned int bpp) +{ + /* The switch statement is compiled in for POWERPC_VSX_API, the call to + * png_have_vsx is compiled in for POWERPC_VSX_CHECK. If both are defined + * the check is only performed if the API has not set the PowerPC option on + * or off explicitly. In this case the check controls what happens. + */ + +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + switch ((pp->options >> PNG_POWERPC_VSX) & 3) + { + case PNG_OPTION_UNSET: + /* Allow the run-time check to execute if it has been enabled - + * thus both API and CHECK can be turned on. If it isn't supported + * this case will fall through to the 'default' below, which just + * returns. + */ +#endif /* PNG_POWERPC_VSX_API_SUPPORTED */ +#ifdef PNG_POWERPC_VSX_CHECK_SUPPORTED + { + static volatile sig_atomic_t no_vsx = -1; /* not checked */ + + if (no_vsx < 0) + no_vsx = !png_have_vsx(pp); + + if (no_vsx) + return; + } +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + break; +#endif +#endif /* PNG_POWERPC_VSX_CHECK_SUPPORTED */ + +#ifdef PNG_POWERPC_VSX_API_SUPPORTED + default: /* OFF or INVALID */ + return; + + case PNG_OPTION_ON: + /* Option turned on */ + break; + } +#endif + + /* IMPORTANT: any new internal functions used here must be declared using + * PNG_INTERNAL_FUNCTION in ../pngpriv.h. This is required so that the + * 'prefix' option to configure works: + * + * ./configure --with-libpng-prefix=foobar_ + * + * Verify you have got this right by running the above command, doing a build + * and examining pngprefix.h; it must contain a #define for every external + * function you add. (Notice that this happens automatically for the + * initialization function.) + */ + pp->read_filter[PNG_FILTER_VALUE_UP-1] = png_read_filter_row_up_vsx; + + if (bpp == 3) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub3_vsx; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg3_vsx; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth3_vsx; + } + + else if (bpp == 4) + { + pp->read_filter[PNG_FILTER_VALUE_SUB-1] = png_read_filter_row_sub4_vsx; + pp->read_filter[PNG_FILTER_VALUE_AVG-1] = png_read_filter_row_avg4_vsx; + pp->read_filter[PNG_FILTER_VALUE_PAETH-1] = png_read_filter_row_paeth4_vsx; + } +} +#endif /* PNG_POWERPC_VSX_OPT > 0 */ +#endif /* READ */