Source/astc_toplevel.cpp - external/github.com/ARM-software/astc-encoder - Git at Google

 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
 // Copyright 2011-2020 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
 // of the License at:
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations
 // under the License.
 // ----------------------------------------------------------------------------

 /**
  * @brief Functions for codec library front-end.
  */

 #include "astc_codec_internals.h"

 #include <stdio.h>
 #include <stdlib.h>

 #ifndef WIN32
 	#include <sys/time.h>
 	#include <pthread.h>
 	#include <unistd.h>

 	double get_time()
 	{
 		timeval tv;
 		gettimeofday(&tv, 0);

 		return (double)tv.tv_sec + (double)tv.tv_usec * 1.0e-6;
 	}


 	int astc_codec_unlink(const char *filename)
 	{
 		return unlink(filename);
 	}

 #else
 	// Define pthread-like functions in terms of Windows threading API
 	#define WIN32_LEAN_AND_MEAN
 	#include <windows.h>

 	typedef HANDLE pthread_t;
 	typedef int pthread_attr_t;

 	int pthread_create(pthread_t * thread, const pthread_attr_t * attribs, void *(*threadfunc) (void *), void *thread_arg)
 	{
 		*thread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) threadfunc, thread_arg, 0, NULL);
 		return 0;
 	}

 	int pthread_join(pthread_t thread, void **value)
 	{
 		WaitForSingleObject(thread, INFINITE);
 		CloseHandle(thread);
 		return 0;
 	}

 	double get_time()
 	{
 		FILETIME tv;
 		GetSystemTimeAsFileTime(&tv);

 		unsigned __int64 ticks = tv.dwHighDateTime;
 		ticks = (ticks << 32) | tv.dwLowDateTime;

 		return ((double)ticks) / 1.0e7;
 	}

 	// Define an unlink() function in terms of the Win32 DeleteFile function.
 	int astc_codec_unlink(const char *filename)
 	{
 		BOOL res = DeleteFileA(filename);
 		return (res ? 0 : -1);
 	}
 #endif

 #ifdef DEBUG_CAPTURE_NAN
 	#ifndef _GNU_SOURCE
 		#define _GNU_SOURCE
 	#endif

 	#include <fenv.h>
 #endif

 extern int block_mode_histogram[2048];

 #ifdef DEBUG_PRINT_DIAGNOSTICS
 	int print_diagnostics = 0;
 	int diagnostics_tile = -1;
 #endif

 int print_tile_errors = 0;

 int print_statistics = 0;

 int progress_counter_divider = 1;

 int rgb_force_use_of_hdr = 0;
 int alpha_force_use_of_hdr = 0;

 static double start_time;
 static double end_time;
 static double start_coding_time;
 static double end_coding_time;

 // code to discover the number of logical CPUs available.
 #if defined(__APPLE__)
 	#define _DARWIN_C_SOURCE
 	#include <sys/types.h>
 	#include <sys/sysctl.h>
 #endif

 #if defined(_WIN32) || defined(__CYGWIN__)
 	#include <windows.h>
 #else
 	#include <unistd.h>
 #endif

 unsigned get_number_of_cpus(void)
 {
 	unsigned n_cpus = 1;

 	#ifdef __linux__
 		cpu_set_t mask;
 		CPU_ZERO(&mask);
 		sched_getaffinity(getpid(), sizeof(mask), &mask);
 		n_cpus = 0;
 		for (unsigned i = 0; i < CPU_SETSIZE; ++i)
 		{
 			if (CPU_ISSET(i, &mask))
 				n_cpus++;
 		}
 		if (n_cpus == 0)
 			n_cpus = 1;

 	#elif defined (_WIN32) || defined(__CYGWIN__)
 		SYSTEM_INFO sysinfo;
 		GetSystemInfo(&sysinfo);
 		n_cpus = sysinfo.dwNumberOfProcessors;

 	#elif defined(__APPLE__)
 		int mib[4];
 		size_t length = 100;
 		mib[0] = CTL_HW;
 		mib[1] = HW_AVAILCPU;
 		sysctl(mib, 2, &n_cpus, &length, NULL, 0);
 	#endif

 	return n_cpus;
 }

 NORETURN void astc_codec_internal_error(const char *filename, int linenum)
 {
 	printf("Internal error: File=%s Line=%d\n", filename, linenum);
 	exit(1);
 }

 #define MAGIC_FILE_CONSTANT 0x5CA1AB13

 struct astc_header
 {
 	uint8_t magic[4];
 	uint8_t blockdim_x;
 	uint8_t blockdim_y;
 	uint8_t blockdim_z;
 	uint8_t xsize[3];			// x-size = xsize[0] + xsize[1] + xsize[2]
 	uint8_t ysize[3];			// x-size, y-size and z-size are given in texels;
 	uint8_t zsize[3];			// block count is inferred
 };

 int suppress_progress_counter = 0;
 int perform_srgb_transform = 0;

 astc_codec_image *load_astc_file(const char *filename, int bitness, astc_decode_mode decode_mode, swizzlepattern swz_decode)
 {
 	int x, y, z;
 	FILE *f = fopen(filename, "rb");
 	if (!f)
 	{
 		printf("Failed to open file %s\n", filename);
 		exit(1);
 	}
 	astc_header hdr;
 	size_t hdr_bytes_read = fread(&hdr, 1, sizeof(astc_header), f);
 	if (hdr_bytes_read != sizeof(astc_header))
 	{
 		fclose(f);
 		printf("Failed to read file %s\n", filename);
 		exit(1);
 	}

 	uint32_t magicval = hdr.magic[0] + 256 * (uint32_t) (hdr.magic[1]) + 65536 * (uint32_t) (hdr.magic[2]) + 16777216 * (uint32_t) (hdr.magic[3]);

 	if (magicval != MAGIC_FILE_CONSTANT)
 	{
 		fclose(f);
 		printf("File %s not recognized\n", filename);
 		exit(1);
 	}

 	int xdim = hdr.blockdim_x;
 	int ydim = hdr.blockdim_y;
 	int zdim = hdr.blockdim_z;

 	if ( (xdim < 3 || xdim > 6 || ydim < 3 || ydim > 6 || zdim < 3 || zdim > 6) &&
 	     (xdim < 4 || xdim == 7 || xdim == 9 || xdim == 11 || xdim > 12 ||
 	      ydim < 4 || ydim == 7 || ydim == 9 || ydim == 11 || ydim > 12 || zdim != 1) )
 	{
 		fclose(f);
 		printf("File %s not recognized %d %d %d\n", filename, xdim, ydim, zdim);
 		exit(1);
 	}

 	int xsize = hdr.xsize[0] + 256 * hdr.xsize[1] + 65536 * hdr.xsize[2];
 	int ysize = hdr.ysize[0] + 256 * hdr.ysize[1] + 65536 * hdr.ysize[2];
 	int zsize = hdr.zsize[0] + 256 * hdr.zsize[1] + 65536 * hdr.zsize[2];

 	if (xsize == 0 || ysize == 0 || zsize == 0)
 	{
 		fclose(f);
 		printf("File %s has zero dimension %d %d %d\n", filename, xsize, ysize, zsize);
 		exit(1);
 	}

 	int xblocks = (xsize + xdim - 1) / xdim;
 	int yblocks = (ysize + ydim - 1) / ydim;
 	int zblocks = (zsize + zdim - 1) / zdim;

 	uint8_t *buffer = (uint8_t *) malloc(xblocks * yblocks * zblocks * 16);
 	if (!buffer)
 	{
 		fclose(f);
 		printf("Ran out of memory\n");
 		exit(1);
 	}
 	size_t bytes_to_read = xblocks * yblocks * zblocks * 16;
 	size_t bytes_read = fread(buffer, 1, bytes_to_read, f);
 	fclose(f);
 	if (bytes_read != bytes_to_read)
 	{
 		printf("Failed to read file %s\n", filename);
 		exit(1);
 	}

 	astc_codec_image *img = allocate_image(bitness, xsize, ysize, zsize, 0);
 	initialize_image(img);

 	imageblock pb;
 	for (z = 0; z < zblocks; z++)
 		for (y = 0; y < yblocks; y++)
 			for (x = 0; x < xblocks; x++)
 			{
 				int offset = (((z * yblocks + y) * xblocks) + x) * 16;
 				uint8_t *bp = buffer + offset;
 				physical_compressed_block pcb = *(physical_compressed_block *) bp;
 				symbolic_compressed_block scb;
 				physical_to_symbolic(xdim, ydim, zdim, pcb, &scb);
 				decompress_symbolic_block(decode_mode, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, &scb, &pb);
 				write_imageblock(img, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_decode);
 			}

 	free(buffer);

 	return img;
 }

 struct encode_astc_image_info
 {
 	int xdim;
 	int ydim;
 	int zdim;
 	const error_weighting_params *ewp;
 	uint8_t *buffer;
 	int *counters;
 	int pack_and_unpack;
 	int thread_id;
 	int threadcount;
 	astc_decode_mode decode_mode;
 	swizzlepattern swz_encode;
 	swizzlepattern swz_decode;
 	int *threads_completed;
 	const astc_codec_image *input_image;
 	astc_codec_image *output_image;
 };

 void *encode_astc_image_threadfunc(void *vblk)
 {
 	const encode_astc_image_info *blk = (const encode_astc_image_info *)vblk;
 	int xdim = blk->xdim;
 	int ydim = blk->ydim;
 	int zdim = blk->zdim;
 	uint8_t *buffer = blk->buffer;
 	const error_weighting_params *ewp = blk->ewp;
 	int thread_id = blk->thread_id;
 	int threadcount = blk->threadcount;
 	int *counters = blk->counters;
 	int pack_and_unpack = blk->pack_and_unpack;
 	astc_decode_mode decode_mode = blk->decode_mode;
 	swizzlepattern swz_encode = blk->swz_encode;
 	swizzlepattern swz_decode = blk->swz_decode;
 	int *threads_completed = blk->threads_completed;
 	const astc_codec_image *input_image = blk->input_image;
 	astc_codec_image *output_image = blk->output_image;

 	imageblock pb;
 	int ctr = thread_id;
 	int pctr = 0;

 	int x, y, z, i;
 	int xsize = input_image->xsize;
 	int ysize = input_image->ysize;
 	int zsize = input_image->zsize;
 	int xblocks = (xsize + xdim - 1) / xdim;
 	int yblocks = (ysize + ydim - 1) / ydim;
 	int zblocks = (zsize + zdim - 1) / zdim;

 	int owns_progress_counter = 0;

 	//allocate memory for temporary buffers
 	compress_symbolic_block_buffers temp_buffers;
 	temp_buffers.ewb = new error_weight_block;
 	temp_buffers.ewbo = new error_weight_block_orig;
 	temp_buffers.tempblocks = new symbolic_compressed_block[4];
 	temp_buffers.temp = new imageblock;
 	temp_buffers.planes2 = new compress_fixed_partition_buffers;
 	temp_buffers.planes2->ei1 = new endpoints_and_weights;
 	temp_buffers.planes2->ei2 = new endpoints_and_weights;
 	temp_buffers.planes2->eix1 = new endpoints_and_weights[MAX_DECIMATION_MODES];
 	temp_buffers.planes2->eix2 = new endpoints_and_weights[MAX_DECIMATION_MODES];
 	temp_buffers.planes2->decimated_quantized_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
 	temp_buffers.planes2->decimated_weights = new float[2 * MAX_DECIMATION_MODES * MAX_WEIGHTS_PER_BLOCK];
 	temp_buffers.planes2->flt_quantized_decimated_quantized_weights = new float[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
 	temp_buffers.planes2->u8_quantized_decimated_quantized_weights = new uint8_t[2 * MAX_WEIGHT_MODES * MAX_WEIGHTS_PER_BLOCK];
 	temp_buffers.plane1 = temp_buffers.planes2;

 	for (z = 0; z < zblocks; z++)
 	{
 		for (y = 0; y < yblocks; y++)
 		{
 			for (x = 0; x < xblocks; x++)
 			{
 				if (ctr == 0)
 				{
 					int offset = ((z * yblocks + y) * xblocks + x) * 16;
 					uint8_t *bp = buffer + offset;
 				#ifdef DEBUG_PRINT_DIAGNOSTICS
 					if (diagnostics_tile < 0 || diagnostics_tile == pctr)
 					{
 						print_diagnostics = (diagnostics_tile == pctr) ? 1 : 0;
 				#endif
 						fetch_imageblock(input_image, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_encode);
 						symbolic_compressed_block scb;
 						compress_symbolic_block(input_image, decode_mode, xdim, ydim, zdim, ewp, &pb, &scb, &temp_buffers);
 						if (pack_and_unpack)
 						{
 							decompress_symbolic_block(decode_mode, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, &scb, &pb);
 							write_imageblock(output_image, &pb, xdim, ydim, zdim, x * xdim, y * ydim, z * zdim, swz_decode);
 						}
 						else
 						{
 							physical_compressed_block pcb;
 							pcb = symbolic_to_physical(xdim, ydim, zdim, &scb);
 							*(physical_compressed_block *) bp = pcb;
 						}
 				#ifdef DEBUG_PRINT_DIAGNOSTICS
 					}
 				#endif

 					counters[thread_id]++;
 					ctr = threadcount - 1;

 					pctr++;

 					// routine to print the progress counter.
 					if (suppress_progress_counter == 0 && (pctr % progress_counter_divider) == 0 && print_tile_errors == 0 && print_statistics == 0)
 					{
 						int do_print = 1;
 						// the current thread has the responsibility for printing the progress counter
 						// if every previous thread has completed. Also, if we have ever received the
 						// responsibility to print the progress counter, we are going to keep it
 						// until the thread is completed.
 						if (!owns_progress_counter)
 						{
 							for (i = thread_id - 1; i >= 0; i--)
 							{
 								if (threads_completed[i] == 0)
 								{
 									do_print = 0;
 									break;
 								}
 							}
 						}
 						if (do_print)
 						{
 							owns_progress_counter = 1;
 							int summa = 0;
 							for (i = 0; i < threadcount; i++)
 								summa += counters[i];
 							printf("\r%d", summa);
 							fflush(stdout);
 						}
 					}
 				}
 				else
 					ctr--;
 			}
 		}
 	}

 	delete[] temp_buffers.planes2->decimated_quantized_weights;
 	delete[] temp_buffers.planes2->decimated_weights;
 	delete[] temp_buffers.planes2->flt_quantized_decimated_quantized_weights;
 	delete[] temp_buffers.planes2->u8_quantized_decimated_quantized_weights;
 	delete[] temp_buffers.planes2->eix1;
 	delete[] temp_buffers.planes2->eix2;
 	delete   temp_buffers.planes2->ei1;
 	delete   temp_buffers.planes2->ei2;
 	delete   temp_buffers.planes2;
 	delete[] temp_buffers.tempblocks;
 	delete   temp_buffers.temp;
 	delete   temp_buffers.ewbo;
 	delete   temp_buffers.ewb;

 	threads_completed[thread_id] = 1;
 	return NULL;
 }

 void encode_astc_image(const astc_codec_image * input_image,
 					   astc_codec_image * output_image,
 					   int xdim,
 					   int ydim,
 					   int zdim,
 					   const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, swizzlepattern swz_decode, uint8_t * buffer, int pack_and_unpack, int threadcount)
 {
 	int i;
 	int *counters = new int[threadcount];
 	int *threads_completed = new int[threadcount];

 	// before entering into the multi-threaded routine, ensure that the block size descriptors
 	// and the partition table descriptors needed actually exist.
 	get_block_size_descriptor(xdim, ydim, zdim);
 	get_partition_table(xdim, ydim, zdim, 0);

 	encode_astc_image_info *ai = new encode_astc_image_info[threadcount];
 	for (i = 0; i < threadcount; i++)
 	{
 		ai[i].xdim = xdim;
 		ai[i].ydim = ydim;
 		ai[i].zdim = zdim;
 		ai[i].buffer = buffer;
 		ai[i].ewp = ewp;
 		ai[i].counters = counters;
 		ai[i].pack_and_unpack = pack_and_unpack;
 		ai[i].thread_id = i;
 		ai[i].threadcount = threadcount;
 		ai[i].decode_mode = decode_mode;
 		ai[i].swz_encode = swz_encode;
 		ai[i].swz_decode = swz_decode;
 		ai[i].threads_completed = threads_completed;
 		ai[i].input_image = input_image;
 		ai[i].output_image = output_image;
 		counters[i] = 0;
 		threads_completed[i] = 0;
 	}

 	if (threadcount == 1)
 		encode_astc_image_threadfunc(&ai[0]);
 	else
 	{
 		pthread_t *threads = new pthread_t[threadcount];
 		for (i = 0; i < threadcount; i++)
 			pthread_create(&(threads[i]), NULL, encode_astc_image_threadfunc, (void *)(&(ai[i])));

 		for (i = 0; i < threadcount; i++)
 			pthread_join(threads[i], NULL);
 		delete[]threads;
 	}

 	delete[]ai;
 	delete[]counters;
 	delete[]threads_completed;
 }

 void store_astc_file(const astc_codec_image * input_image,
 					 const char *filename, int xdim, int ydim, int zdim, const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, int threadcount)
 {
 	int xsize = input_image->xsize;
 	int ysize = input_image->ysize;
 	int zsize = input_image->zsize;

 	int xblocks = (xsize + xdim - 1) / xdim;
 	int yblocks = (ysize + ydim - 1) / ydim;
 	int zblocks = (zsize + zdim - 1) / zdim;

 	uint8_t *buffer = (uint8_t *) malloc(xblocks * yblocks * zblocks * 16);
 	if (!buffer)
 	{
 		printf("Ran out of memory\n");
 		exit(1);
 	}

 	if (!suppress_progress_counter)
 		printf("%d blocks to process ..\n", xblocks * yblocks * zblocks);

 	encode_astc_image(input_image, NULL, xdim, ydim, zdim, ewp, decode_mode, swz_encode, swz_encode, buffer, 0, threadcount);

 	end_coding_time = get_time();

 	astc_header hdr;
 	hdr.magic[0] = MAGIC_FILE_CONSTANT & 0xFF;
 	hdr.magic[1] = (MAGIC_FILE_CONSTANT >> 8) & 0xFF;
 	hdr.magic[2] = (MAGIC_FILE_CONSTANT >> 16) & 0xFF;
 	hdr.magic[3] = (MAGIC_FILE_CONSTANT >> 24) & 0xFF;
 	hdr.blockdim_x = xdim;
 	hdr.blockdim_y = ydim;
 	hdr.blockdim_z = zdim;
 	hdr.xsize[0] = xsize & 0xFF;
 	hdr.xsize[1] = (xsize >> 8) & 0xFF;
 	hdr.xsize[2] = (xsize >> 16) & 0xFF;
 	hdr.ysize[0] = ysize & 0xFF;
 	hdr.ysize[1] = (ysize >> 8) & 0xFF;
 	hdr.ysize[2] = (ysize >> 16) & 0xFF;
 	hdr.zsize[0] = zsize & 0xFF;
 	hdr.zsize[1] = (zsize >> 8) & 0xFF;
 	hdr.zsize[2] = (zsize >> 16) & 0xFF;

 	FILE *wf = fopen(filename, "wb");
 	fwrite(&hdr, 1, sizeof(astc_header), wf);
 	fwrite(buffer, 1, xblocks * yblocks * zblocks * 16, wf);
 	fclose(wf);
 	free(buffer);
 }

 astc_codec_image *pack_and_unpack_astc_image(const astc_codec_image * input_image,
 											 int xdim,
 											 int ydim,
 											 int zdim,
 											 const error_weighting_params * ewp, astc_decode_mode decode_mode, swizzlepattern swz_encode, swizzlepattern swz_decode, int bitness, int threadcount)
 {
 	int xsize = input_image->xsize;
 	int ysize = input_image->ysize;
 	int zsize = input_image->zsize;

 	astc_codec_image *img = allocate_image(bitness, xsize, ysize, zsize, 0);

 	/* allocate_output_image_space( bitness, xsize, ysize, zsize ); */
 	int xblocks = (xsize + xdim - 1) / xdim;
 	int yblocks = (ysize + ydim - 1) / ydim;
 	int zblocks = (zsize + zdim - 1) / zdim;

 	if (!suppress_progress_counter)
 		printf("%d blocks to process...\n", xblocks * yblocks * zblocks);

 	encode_astc_image(input_image, img, xdim, ydim, zdim, ewp, decode_mode, swz_encode, swz_decode, NULL, 1, threadcount);

 	if (!suppress_progress_counter)
 		printf("\n");

 	return img;
 }

 void find_closest_blockdim_2d(float target_bitrate, int *x, int *y)
 {
 	int blockdims[6] = { 4, 5, 6, 8, 10, 12 };

 	float best_error = 1000;
 	float aspect_of_best = 1;
 	int i, j;

 	// Y dimension
 	for (i = 0; i < 6; i++)
 	{
 		// X dimension
 		for (j = i; j < 6; j++)
 		{
 			//              NxN       MxN         8x5               10x5              10x6
 			int is_legal = (j==i) || (j==i+1) || (j==3 && i==1) || (j==4 && i==1) || (j==4 && i==2);
 			if(is_legal)
 			{
 				float bitrate = 128.0f / (blockdims[i] * blockdims[j]);
 				float bitrate_error = fabs(bitrate - target_bitrate);
 				float aspect = (float)blockdims[j] / blockdims[i];
 				if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best))
 				{
 					*x = blockdims[j];
 					*y = blockdims[i];
 					best_error = bitrate_error;
 					aspect_of_best = aspect;
 				}
 			}
 		}
 	}
 }

 void find_closest_blockdim_3d(float target_bitrate, int *x, int *y, int *z)
 {
 	int blockdims[4] = { 3, 4, 5, 6 };

 	float best_error = 1000;
 	float aspect_of_best = 1;
 	int i, j, k;

 	for (i = 0; i < 4; i++)	// Z
 	{
 		for (j = i; j < 4; j++) // Y
 		{
 			for (k = j; k < 4; k++) // X
 			{
 				//              NxNxN              MxNxN                  MxMxN
 				int is_legal = ((k==j)&&(j==i)) || ((k==j+1)&&(j==i)) || ((k==j)&&(j==i+1));
 				if(is_legal)
 				{
 					float bitrate = 128.0f / (blockdims[i] * blockdims[j] * blockdims[k]);
 					float bitrate_error = fabs(bitrate - target_bitrate);
 					float aspect = (float)blockdims[k] / blockdims[j] + (float)blockdims[j] / blockdims[i] + (float)blockdims[k] / blockdims[i];

 					if (bitrate_error < best_error || (bitrate_error == best_error && aspect < aspect_of_best))
 					{
 						*x = blockdims[k];
 						*y = blockdims[j];
 						*z = blockdims[i];
 						best_error = bitrate_error;
 						aspect_of_best = aspect;
 					}
 				}
 			}
 		}
 	}
 }

 void compare_two_files(const char *filename1, const char *filename2, int low_fstop, int high_fstop, int psnrmode)
 {
 	int load_result1;
 	int load_result2;
 	astc_codec_image *img1 = astc_codec_load_image(filename1, 0, &load_result1);
 	if (load_result1 < 0)
 	{
 		printf("Failed to load file %s.\n", filename1);
 		exit(1);
 	}
 	astc_codec_image *img2 = astc_codec_load_image(filename2, 0, &load_result2);
 	if (load_result2 < 0)
 	{
 		printf("Failed to load file %s.\n", filename2);
 		exit(1);
 	}

 	int file1_components = load_result1 & 0x7;
 	int file2_components = load_result2 & 0x7;
 	int comparison_components = MAX(file1_components, file2_components);

 	int compare_hdr = 0;
 	if (load_result1 & 0x80)
 		compare_hdr = 1;
 	if (load_result2 & 0x80)
 		compare_hdr = 1;

 	compute_error_metrics(compare_hdr, comparison_components, img1, img2, low_fstop, high_fstop, psnrmode);
 }

 union if32
 {
 	float f;
 	int32_t s;
 	uint32_t u;
 };

 // The ASTC codec is written with the assumption that a float threaded through
 // the "if32" union will in fact be stored and reloaded as a 32-bit IEEE-754 single-precision
 // float, stored with round-to-nearest rounding. This is always the case in an
 // IEEE-754 compliant system, however not every system is actually IEEE-754 compliant
 // in the first place. As such, we run a quick test to check that this is actually the case
 // (e.g. gcc on 32-bit x86 will typically fail unless -msse2 -mfpmath=sse2 is specified).
 volatile float xprec_testval = 2.51f;
 void test_inappropriate_extended_precision(void)
 {
 	if32 p;
 	p.f = xprec_testval + 12582912.0f;
 	float q = p.f - 12582912.0f;
 	if (q != 3.0f)
 	{
 		printf("Single-precision test failed; please recompile with proper IEEE-754 support.\n");
 		exit(1);
 	}
 }

 // Debug routine to dump the entire image if requested.
 void dump_image(astc_codec_image * img)
 {
 	int x, y, z, xdim, ydim, zdim;

 	printf("\n\nDumping image ( %d x %d x %d + %d)...\n\n", img->xsize, img->ysize, img->zsize, img->padding);

 	if (img->zsize != 1)
 		zdim = img->zsize + 2 * img->padding;
 	else
 		zdim = img->zsize;

 	ydim = img->ysize + 2 * img->padding;
 	xdim = img->xsize + 2 * img->padding;

 	for (z = 0; z < zdim; z++)
 	{
 		if (z != 0)
 			printf("\n\n");
 		for (y = 0; y < ydim; y++)
 		{
 			if (y != 0)
 				printf("\n");

 			for (x = 0; x < xdim; x++)
 			{
 				printf("  0x%08X", *(int unsigned *)&img->imagedata8[z][y][x]);
 			}
 		}
 	}
 	printf("\n\n");
 }

 int astc_main(int argc, char **argv)
 {
 	test_inappropriate_extended_precision();
 	// initialization routines
 	prepare_angular_tables();
 	build_quantization_mode_table();

 	start_time = get_time();

 	#ifdef DEBUG_CAPTURE_NAN
 		feenableexcept(FE_DIVBYZERO | FE_INVALID);
 	#endif

 	if (argc < 4)
 	{

 		printf(	"ASTC codec version 1.7\n"
 				"Copyright (C) 2011-2019 Arm Limited\n"
 				"All rights reserved. Use of this software is subject to terms of its license.\n\n"
 				"Usage:\n"
 				"Compress to texture file:\n"
 				"   %s -c <inputfile> <outputfile> <rate> [options]\n"
 				"Decompress from texture file:\n"
 				"   %s -d <inputfile> <outputfile> [options]\n"
 				"Compress, then immediately decompress to image:\n"
 				"   %s -t <inputfile> <outputfile> <rate> [options]\n"
 				"Compare two files (no compression or decompression):\n"
 				"   %s -compare <file1> <file2> [options]\n"
 				"\n"
 				"When encoding/decoding a texture for use with the LDR-SRGB submode,\n"
 				"use -cs, -ds, -ts instead of -c, -d, -t.\n"
 				"When encoding/decoding a texture for use with the LDR-linear submode,\n"
 				"use -cl, -dl, -tl instead of -c, -d, -t.\n"
 				"\n"
 				"For compression, the input file formats supported are\n"
 				" * PNG (*.png)\n"
 				" * Targa (*.tga)\n"
 				" * JPEG (*.jpg)\n"
 				" * GIF (*.gif) (non-animated only)\n"
 				" * BMP (*.bmp)\n"
 				" * Radiance HDR (*.hdr)\n"
 				" * Khronos Texture KTX (*.ktx)\n"
 				" * DirectDraw Surface DDS (*.dds)\n"
 				" * Half-Float-TGA (*.htga)\n"
 				" * OpenEXR (*.exr; only if 'exr_to_htga' is present in the path)\n"
 				"\n"
 				"For the KTX and DDS formats, the following subset of the format\n"
 				"features are supported; the subset is:\n"
 				" * 2D and 3D textures supported\n"
 				" * Uncompressed only, with unorm8, unorm16, float16 or float32 components\n"
 				" * R, RG, RGB, BGR, RGBA, BGRA, Luminance and Luminance-Alpha texel formats\n"
 				" * In case of multiple image in one file (mipmap, cube-faces, texture-arrays)\n"
 				"   the codec will read the first one and ignore the other ones.\n"
 				"\n"
 				"When using HDR or 3D textures, it is recommended to use the KTX or DDS formats.\n"
 				"Separate 2D image slices can be assembled into a 3D image using the -array option.\n"
 				"\n"
 				"The output file will be an ASTC compressed texture file (recommended filename\n"
 				"ending .astc)\n"
 				"\n"
 				"For decompression, the input file must be an ASTC compressed texture file;\n"
 				"the following formats are supported for output:\n"
 				" * Targa (*.tga)\n"
 				" * KTX (*.ktx)\n"
 				" * DDS (*.dds)\n"
 				" * Half-Float-TGA (*.htga)\n"
 				" * OpenEXR (*.exr; only if 'exr_to_htga' is present in the path)\n"
 				"\n"
 				"Targa is suitable only for 2D LDR images; for HDR and/or 3D images,\n"
 				"please use KTX or DDS.\n"
 				"\n"
 				"For compression, the <rate> argument specifies the bitrate or block\n"
 				"dimension to use. This argument can be specified in one of two ways:\n"
 				" * A decimal number (at least one actual decimal needed). This will cause \n"
 				"   the codec to interpret the number as a desired bitrate, and pick a block\n"
 				"   size to match that bitrate as closely as possible. For example, if you want a\n"
 				"   bitrate of 2.0 bits per texel, then specify the <rate> argument as 2.0\n"
 				" * A block size. This specifies the block dimensions to use along the\n"
 				"   X, Y (and for 3D textures) Z axes. The dimensions are separated with\n"
 				"   the character x, with no spaces. For 2D textures, the supported\n"
 				"   dimensions along each axis are picked from the set {4,5,6,8,10,12};\n"
 				"   for 3D textures, the supported dimensions are picked from the\n"
 				"   set {3,4,5,6}. For example, if you wish to encode a 2D texture using the\n"
 				"   10x6 block size (10 texels per block along the X axis, 6 texels per block\n"
 				"   along the Y axis, then specify the <rate> argument as 10x6 .\n"
 				"Some examples of supported 2D block sizes are:\n"
 				"  4x4 -> 8.0 bpp\n"
 				"  5x5 -> 5.12 bpp\n"
 				"  6x6 -> 3.56 bpp\n"
 				"  8x6 -> 2.67 bpp\n"
 				"  8x8 -> 2.0 bpp\n"
 				" 10x8 -> 1.6 bpp\n"
 				" 10x10 -> 1.28 bpp\n"
 				" 10x12 -> 1.07 bpp\n"
 				" 12x12 -> 0.89 bpp\n"
 				"If you try to specify a bitrate that can potentially map to multiple different\n"
 				"block sizes, the codec will choose the block size with the least lopsided\n"
 				"aspect ratio (e.g. if you specify 2.67, then the codec will choose the\n"
 				"8x6 block size, not 12x4)\n"
 				"\n"
 				"Below is a description of all the available options. Most of them make sense\n"
 				"for encoding only, however there are some that affect decoding as well\n"
 				"(such as -dsw and the normal-presets)\n"
 				"\n"
 				"\n"
 				"Built-in error-weighting Presets:\n"
 				"---------------------------------\n"
 				"The presets provide easy-to-use combinations of encoding options that\n"
 				"are designed for use with certain commonly-occurring kinds of\n"
 				"textures.\n"
 				"\n"
 				" -normal_psnr\n"
 				"      For encoding, assume that the input texture is a normal map with the\n"
 				"      X and Y components of the actual normals in the Red and Green\n"
 				"      color channels. The codec will then move the 2nd component to Alpha,\n"
 				"      and apply an error-weighting function based on angular error.\n"
 				"\n"
 				"      It is possible to use this preset with texture decoding as well,\n"
 				"      in which case it will expand the normal map from 2 to 3 components\n"
 				"      after the actual decoding.\n"
 				"\n"
 				"      The -normal_psnr preset as a whole is equivalent to the options\n"
 				"      \"-rn -esw rrrg -dsw raz1 -ch 1 0 0 1 -oplimit 1000 -mincorrel 0.99\" .\n"
 				"\n"
 				" -normal_percep\n"
 				"      Similar to -normal_psnr, except that it tries to optimize the normal\n"
 				"      map for best possible perceptual results instead of just maximizing\n"
 				"      angular PSNR.\n"
 				"      The -normal_percep preset as a whole is equivalent to the options\n"
 				"      \"-normal_psnr -b 2.5 -v 3 1 1 0 50 0 -va 1 1 0 50 -dblimit 60\" .\n"
 				"\n"
 				" -mask\n"
 				"      Assume that the input texture is a texture that contains\n"
 				"      unrelated content in its various color channels, and where\n"
 				"      it is undesirable for errors in one channel to affect\n"
 				"      the other channels.\n"
 				"      Equivalent to \"-v 3 1 1 0 25 0.03 -va 0 25\" .\n"
 				"\n"
 				" -alphablend\n"
 				"      Assume that the input texture is an RGB-alpha texture where\n"
 				"      the alpha component is used to represent opacity.\n"
 				"      (0=fully transparent, 1=fully opaque)\n"
 				"      Equivalent to \"-a 1\" .\n"
 				"\n"
 				" -hdr\n"
 				"      Assume that the input texture is an HDR texture. If an alpha channel is\n"
 				"      present, it is treated as an LDR channel (e.g. opacity)\n"
 				"      Optimize for 4th-root error for the color and linear error for the alpha.\n"
 				"      Equivalent to\n"
 				"          \"-forcehdr_rgb -v 0 0.75 0 1 0 0 -va 0.02 1 0 0 -dblimit 999\"\n"
 				"\n"
 				" -hdra\n"
 				"      Assume that the input texture is an HDR texture, and optimize\n"
 				"      for 4th-root error. If an alpha channel is present, it is\n"
 				"      assumed to be HDR and optimized for 4th-root error as well.\n"
 				"      Equivalent to\n"
 				"          \"-forcehdr_rgba -v 0 0.75 0 1 0 0 -va 0.75 0 1 0 -dblimit 999\"\n"
 				"\n"
 				" -hdr_log\n"
 				" -hdra_log\n"
 				"      Assume that the input texture is an HDR texture, and optimize\n"
 				"      for logarithmic error. This should give better results than -hdr\n"
 				"      on metrics like \"logRMSE\" and \"mPSNR\", but the subjective\n"
 				"      quality (in particular block artifacts) is generally significantly worse\n"
 				"      than -hdr.\n"
 				"      \"-hdr_log\" is equivalent to\n"
 				"          \"-forcehdr_rgb -v 0 1 0 1 0 0 -va 0.02 1 0 0 -dblimit 999\"\n"
 				"      \"-hdra_log\" is equivalent to\n"
 				"          \"-forcehdr_rgba -v 0 1 0 1 0 0 -va 1 0 1 0 -dblimit 999\"\n"
 				"\n"
 				"\n"
 				"\n"
 				"Performance-quality tradeoff presets:\n"
 				"-------------------------------------\n"
 				"These are presets that provide different tradeoffs between encoding\n"
 				"performance and quality. Exactly one of these presets has to be specified\n"
 				"for encoding; if this is not done, the codec reports an error message.\n"
 				"\n"
 				" -veryfast\n"
 				"      Run codec in very-fast-mode; this generally results in substantial\n"
 				"      quality loss.\n"
 				"\n"
 				" -fast\n"
 				"      Run codec in fast-mode. This generally results in mild quality loss.\n"
 				"\n"
 				" -medium\n"
 				"      Run codec in medium-speed-mode.\n"
 				"\n"
 				" -thorough\n"
 				"     Run codec in thorough-mode. This should be sufficient to fix most\n"
 				"     cases where \"-medium\" provides inadequate quality.\n"
 				"\n"
 				" -exhaustive\n"
 				"      Run codec in exhaustive-mode. This usually produces only\n"
 				"      marginally better quality than \"-thorough\" while considerably\n"
 				"      increasing encode time.\n"
 				"\n"
 				"\n"
 				"Low-level error weighting options:\n"
 				"----------------------------------\n"
 				"These options provide low-level control of the error-weighting options\n"
 				"that the codec provides.\n"
 				"\n"
 				" -v <radius> <power> <baseweight> <avgscale> <stdevscale> <mixing-factor>\n"
 				"      Compute the per-texel relative error weighting for the RGB color\n"
 				"      channels as follows:\n"
 				"\n"
 				"       weight = 1 / (<baseweight> + <avgscale>\n"
 				"            * average^2 + <stdevscale> * stdev^2)\n"
 				"\n"
 				"      The average and stdev are computed as the average-value and the\n"
 				"      standard deviation across a neighborhood of each texel; the <radius>\n"
 				"      argument specifies how wide this neighborhood should be.\n"
 				"      If this option is given without -va, it affects the weighting of RGB\n"
 				"      color components only, while alpha is assigned the weight 1.0 .\n"
 				"\n"
 				"      The <mixing-factor> parameter is used to control the degree of mixing\n"
 				"      between color channels. Setting this parameter to 0 causes the average\n"
 				"      and stdev computation to be done completely separately for each color\n"
 				"      channel; setting it to 1 causes the results from the red, green and\n"
 				"      blue color channel to be combined into a single result that is applied\n"
 				"      to all three channels. It is possible to set the mixing factor\n"
 				"      to a value between 0 and 1 in order to obtain a result in-between.\n"
 				"\n"
 				"      The <power> argument is a power used to raise the values of the input\n"
 				"      pixels before computing average and stdev; e.g. a power of 0.5 causes\n"
 				"      the codec to take the square root of every input pixel value before\n"
 				"      computing the averages and standard deviations.\n"
 				"\n"
 				" -va <baseweight> <power> <avgscale> <stdevscale>\n"
 				"      Used together with -v; it computes a relative per-texel\n"
 				"      weighting for the alpha component based on average and standard\n"
 				"      deviation in the same manner as described for -v, but with its own\n"
 				"      <baseweight>, <power>, <avgscale> and <stdevscale> parameters.\n"
 				"\n"
 				" -a <radius>\n"
 				"      For textures with alpha channel, scale per-texel weights by\n"
 				"      alpha. The alpha value chosen for scaling of any particular texel\n"
 				"      is taken as an average across a neighborhood of the texel.\n"
 				"      The <radius> argument gives the radius of this neighborhood;\n"
 				"      a radius of 0 causes the texel's own alpha value to be used with\n"
 				"      no contribution from neighboring texels.\n"
 				"\n"
 				" -ch <red_weight> <green_weight> <blue_weight> <alpha_weight>\n"
 				"      Assign relative weight to each color channel.\n"
 				"      If this option is combined with any of the other options above,\n"
 				"      the other options are used to compute a weighting, then the \n"
 				"      weighting is multiplied by the weighting provided by this argument.\n"
 				"\n"
 				" -rn\n"
 				"      Assume that the red and alpha color channels (after swizzle)\n"
 				"      represent the X and Y components for a normal map,\n"
 				"      and scale the error weighting so as to match angular error as closely\n"
 				"      as possible. The reconstruction function for the Z component\n"
 				"      is assumed to be Z=sqrt(1 - X^2 - X^2).\n"
 				"\n"
 				" -b <weighting>\n"
 				"      Increase error weight for texels at compression-block edges\n"
 				"      and corners; the parameter specifies how much the weights are to be\n"
 				"      modified, with 0 giving no modification. Higher values should reduce\n"
 				"      block-artifacts, at the cost of worsening other artifacts.\n"
 				"\n"
 				"\n"
 				"Low-level performance-quality tradeoff options:\n"
 				"-----------------------------------------------\n"
 				"These options provide low-level control of the performance-quality tradeoffs\n"
 				"that the codec provides.\n"
 				"\n"
 				" -plimit <number>\n"
 				"      Test only <number> different partitions. Higher numbers give better\n"
 				"      quality at the expense of longer encode time; however large values tend\n"
 				"      to give diminishing returns. This parameter can be set to a\n"
 				"      number from 1 to %d. By default, this limit is set based on the active\n"
 				"      preset, as follows:\n"
 				"        -veryfast :  2\n"
 				"        -fast     :  4\n"
 				"        -medium   :  25\n"
 				"        -thorough :  100\n"
 				"        -exhaustive  : %d\n"
 				"\n"
 				" -dblimit <number>\n"
 				"      Stop compression work on a block as soon as the PSNR of the block,\n"
 				"      as measured in dB, exceeds this limit. Higher numbers give better\n"
 				"      quality at the expense of longer encode times. If not set explicitly,\n"
 				"      it is set based on the currently-active block size and preset, as listed\n"
 				"      below (where N is the number of texels per block):\n"
 				"\n"
 				"        -veryfast : dblimit = MAX( 53-19*log10(N), 70-35*log10(N) )\n"
 				"        -fast     : dblimit = MAX( 63-19*log10(N), 85-35*log10(N) )\n"
 				"        -medium   : dblimit = MAX( 70-19*log10(N), 95-35*log10(N) )\n"
 				"        -thorough   : dblimit = MAX( 77-19*log10(N), 105-35*log10(N) )\n"
 				"        -exhaustive : dblimit = 999\n"
 				"\n"
 				"      Note that the compressor is not actually guaranteed to reach these PSNR\n"
 				"      numbers for any given block; also, at the point where the compressor\n"
 				"      discovers that it has exceeded the dblimit, it may have exceeded it by\n"
 				"      a large amount, so it is still possible to get a PSNR value that is\n"
 				"      substantially higher than the dblimit would suggest.\n"
 				"\n"
 				"      This option is ineffective for HDR textures.\n"
 				"\n"
 				" -oplimit <factor>\n"
 				"      If the error term from encoding with 2 partitions is greater than the\n"
 				"      error term from encoding with 1 partition by more than the specified\n"
 				"      factor, then cut compression work short.\n"
 				"      By default, this factor is set based on the active preset, as follows:\n"
 				"        -veryfast : 1.0\n"
 				"        -fast     : 1.0\n"
 				"        -medium   : 1.2\n"
 				"        -thorough : 2.5\n"
 				"        -exhaustive  : 1000\n"
 				"      The codec will not apply this factor if the input texture is a normal\n"
 				"      map (content resembles a normal-map, or one of the -normal_* presets\n"
 				"      is used).\n"
 				"\n"
 				" -mincorrel <value>\n"
 				"      For each block, the codec will compute the correlation coefficients\n"
 				"      between any two color components; if no pair of colors have a\n"
 				"      correlation coefficient below the cutoff specified by this switch,\n"
 				"      the codec will abstain from trying the dual-weight-planes.\n"
 				"      By default, this factor is set based on the active preset, as follows:\n"
 				"        -veryfast : 0.5\n"
 				"        -fast     : 0.5\n"
 				"        -medium   : 0.75\n"
 				"        -thorough : 0.95\n"
 				"        -exhaustive  : 0.99\n"
 				"      If the input texture is a normal-map (content resembles a normal-map\n"
 				"      or one of the -normal_* presets are used) the codec will use a value\n"
 				"      of 0.99.\n"
 				"\n"
 				" -bmc <value>\n"
 				"      Cutoff on the set of block modes to use; the cutoff is a percentile\n"
 				"      of the block modes that are most commonly used. The value takes a value\n"
 				"      from 0 to 100, where 0 offers the highest speed and lowest quality,\n"
 				"      and 100 offers the highest quality and lowest speed.\n"
 				"      By default, this factor is set based on the active preset, as follows:\n"
 				"       -veryfast  : 25\n"
 				"       -fast      : 50\n"
 				"       -medium    : 75\n"
 				"       -thorough  : 95\n"
 				"       -exhaustive : 100\n"
 				"      This option is ineffective for 3D textures.\n"
 				"\n"
 				" -maxiters <value>\n"
 				"      Maximum number of refinement iterations to apply to colors and weights.\n"
 				"      Minimum value is 1; larger values give slight quality increase\n"
 				"      at expense of mild performance loss. By default, the iteration count is\n"
 				"      picked based on the active preset, as follows:\n"
 				"       -veryfast  : 1\n"
 				"       -fast      : 1\n"
 				"       -medium    : 2\n"
 				"       -thorough  : 4\n"
 				"       -exhaustive : 4\n"
 				"\n"
 				"\n"
 				"\n"
 				"Other options:\n"
 				"--------------\n"
 				"\n"
 				" -array <size>\n"
 				"      Loads a an array of 2D image slices as a 3D image. The filename given\n"
 				"      is used as a base, and decorated with _0, _1, up to _<size-1> prior\n"
 				"      to loading each slice. So -array 3 input.png would load input_0.png,\n"
 				"      input_1.png and input_2.png as slices at z=0,1,2 respectively.\n"
 				"\n"
 				" -forcehdr_rgb\n"
 				"      Force the use of HDR endpoint modes. By default, only LDR endpoint\n"
 				"      modes are used. If alpha is present, alpha is kept as LDR.\n"
 				" -forcehdr_rgba\n"
 				"      Force the use of HDR endpoint modes. By default, only LDR endpoint\n"
 				"      modes are used. If alpha is present, alpha is forced into HDR as well.\n"
 				"\n"
 				" -esw <swizzlepattern>\n"
 				"      Swizzle the color components before encoding. The swizzle pattern\n"
 				"      is specified as a 4-character string, where the characters specify\n"
 				"      which color component will end up in the Red, Green, Blue and Alpha\n"
 				"      channels before encoding takes place. The characters may be taken\n"
 				"      from the set (r,g,b,a,0,1), where r,g,b,a use color components from\n"
 				"      the input texture and 0,1 use the constant values 0 and 1.\n"
 				"\n"
 				"      As an example, if you have an input RGBA texture where you wish to\n"
 				"      switch around the R and G channels, as well as replacing the\n"
 				"      alpha channel with the constant value 1, a suitable swizzle\n"
 				"      option would be:\n"
 				"        -esw grb1\n"
 				"      Note that if -esw is used together with any of the\n"
 				"      error weighting functions, the swizzle is considered to be\n"
 				"      applied before the error weighting function.\n"
 				"\n"
 				" -dsw <swizzlepattern>\n"
 				"      Swizzle pattern to apply after decoding a texture. This pattern is\n"
 				"      specified in the same way as the pre-encoding swizzle pattern\n"
 				"      for the -sw switch. However, one additional character is supported,\n"
 				"      namely 'z' for constructing the third component of a normal map.\n"
 				"\n"
 				" -srgb\n"
 				"      Convert input image from sRGB to linear-RGB before encode; convert\n"
 				"      output image from linear-RGB to sRGB after decode. For encode, the\n"
 				"      transform is applied after swizzle; for decode, the transform\n"
 				"      is applied before swizzle.\n"
 				"\n"
 				" -j <numthreads>\n"
 				"      Run encoding with multithreading, using the specified number\n"
 				"      of threads. If not specified, the codec will autodetect the\n"
 				"      number of available logical CPUs and spawn one thread for each.\n"
 				"      Use \"-j 1\" if you wish to run the codec in single-thread mode.\n"
 				"\n"
 				" -silentmode\n"
 				"      Suppresses all output from the codec, except in case of errors.\n"
 				"      If this switch is not provided, the codec will display the encoding\n"
 				"      settings it uses and show a progress counter during encode.\n"
 				"\n"
 				" -time\n"
 				"      Displays time taken for entire run, together with time taken for\n"
 				"      coding step only. If requested, this is output even in -silentmode.\n"
 				"\n"
 				" -showpsnr\n"
 				"      In test mode (-t), displays PSNR difference between input and output\n"
 				"      images, in dB, even if -silentmode is specified. Works for LDR images\n"
 				"      only.\n"
 				"\n"
 				" -mpsnr <low> <high>\n"
 				"     Set the low and high f-stop values to use for the mPSNR error metric.\n"
 				"     Default is low=-10, high=10.\n"
 				"     The mPSNR error metric only applies to HDR textures.\n"
 				"     This option can be used together with -compare .\n"
 				"\n"
 				"\n"
 				"\n"
 				"Tips & tricks:\n"
 				"--------------"
 				"\n"
 				"ASTC, being a block-based format, is moderately prone to block artifacts.\n"
 				"If block artifacts are a problem when compressing a given texture,\n"
 				"adding some or all of following command-line options may help:\n"
 				" -b 1.8\n"
 				" -v 2 1 1 0 25 0.1\n"
 				" -va 1 1 0 25\n"
 				" -dblimit 60\n"
 				"The -b option is a general-purpose block-artifact reduction option. The\n"
 				"-v and -va options concentrate effort where smooth regions lie next to regions\n"
 				"with high detail (such regions are particularly prone to block artifacts\n"
 				"otherwise). The -dblimit option is sometimes also needed to reduce\n"
 				"block artifacts in regions with very smooth gradients.\n"
 				"\n"
 				"If a texture exhibits severe block artifacts in only some, but not all, of\n"
 				"the color channels (common problem with mask textures), then it may help\n"
 				"to use the -ch option to raise the weighting of the affected color channel(s).\n"
 				"For example, if the green color channel in particular suffers from block\n"
 				"artifacts, then using the commandline option\n"
 				" -ch 1 6 1 1\n"
 				"should improve the result significantly.\n"

 			   , argv[0], argv[0], argv[0], argv[0], PARTITION_COUNT, PARTITION_COUNT);

 		exit(1);
 	}

 	astc_decode_mode decode_mode = DECODE_HDR;
 	int opmode;					// 0=compress, 1=decompress, 2=do both, 4=compare
 	if (!strcmp(argv[1], "-c"))
 	{
 		opmode = 0;
 		decode_mode = DECODE_HDR;
 	}
 	else if (!strcmp(argv[1], "-d"))
 	{
 		opmode = 1;
 		decode_mode = DECODE_HDR;
 	}
 	else if (!strcmp(argv[1], "-t"))
 	{
 		opmode = 2;
 		decode_mode = DECODE_HDR;
 	}
 	else if (!strcmp(argv[1], "-cs"))
 	{
 		opmode = 0;
 		decode_mode = DECODE_LDR_SRGB;
 	}
 	else if (!strcmp(argv[1], "-ds"))
 	{
 		opmode = 1;
 		decode_mode = DECODE_LDR_SRGB;
 	}
 	else if (!strcmp(argv[1], "-ts"))
 	{
 		opmode = 2;
 		decode_mode = DECODE_LDR_SRGB;
 	}
 	else if (!strcmp(argv[1], "-cl"))
 	{
 		opmode = 0;
 		decode_mode = DECODE_LDR;
 	}
 	else if (!strcmp(argv[1], "-dl"))
 	{
 		opmode = 1;
 		decode_mode = DECODE_LDR;
 	}
 	else if (!strcmp(argv[1], "-tl"))
 	{
 		opmode = 2;
 		decode_mode = DECODE_LDR;
 	}
 	else if (!strcmp(argv[1], "-compare"))
 	{
 		opmode = 4;
 		decode_mode = DECODE_HDR;
 	}
 	else
 	{
 		printf("Unrecognized operation\n");
 		exit(1);
 	}

 	int array_size = 1;

 	const char *input_filename = argv[2];
 	const char *output_filename = argv[3];

 	int silentmode = 0;
 	int timemode = 0;
 	int psnrmode = 0;

 	error_weighting_params ewp;

 	ewp.rgb_power = 1.0f;
 	ewp.alpha_power = 1.0f;
 	ewp.rgb_base_weight = 1.0f;
 	ewp.alpha_base_weight = 1.0f;
 	ewp.rgb_mean_weight = 0.0f;
 	ewp.rgb_stdev_weight = 0.0f;
 	ewp.alpha_mean_weight = 0.0f;
 	ewp.alpha_stdev_weight = 0.0f;

 	ewp.rgb_mean_and_stdev_mixing = 0.0f;
 	ewp.mean_stdev_radius = 0;
 	ewp.enable_rgb_scale_with_alpha = 0;
 	ewp.alpha_radius = 0;

 	ewp.block_artifact_suppression = 0.0f;
 	ewp.rgba_weights[0] = 1.0f;
 	ewp.rgba_weights[1] = 1.0f;
 	ewp.rgba_weights[2] = 1.0f;
 	ewp.rgba_weights[3] = 1.0f;
 	ewp.ra_normal_angular_scale = 0;

 	swizzlepattern swz_encode = { 0, 1, 2, 3 };
 	swizzlepattern swz_decode = { 0, 1, 2, 3 };

 	int thread_count = 0;		// default value
 	int thread_count_autodetected = 0;

 	int preset_has_been_set = 0;

 	int plimit_autoset = -1;
 	int plimit_user_specified = -1;
 	int plimit_set_by_user = 0;

 	float dblimit_autoset_2d = 0.0;
 	float dblimit_autoset_3d = 0.0;
 	float dblimit_user_specified = 0.0;
 	int dblimit_set_by_user = 0;

 	float oplimit_autoset = 0.0;
 	float oplimit_user_specified = 0.0;
 	int oplimit_set_by_user = 0;

 	float mincorrel_autoset = 0.0;
 	float mincorrel_user_specified = 0.0;
 	int mincorrel_set_by_user = 0;

 	float bmc_user_specified = 0.0;
 	float bmc_autoset = 0.0;
 	int bmc_set_by_user = 0;

 	int maxiters_user_specified = 0;
 	int maxiters_autoset = 0;
 	int maxiters_set_by_user = 0;

 	int pcdiv = 1;

 	int xdim_2d = 0;
 	int ydim_2d = 0;
 	int xdim_3d = 0;
 	int ydim_3d = 0;
 	int zdim_3d = 0;

 	int target_bitrate_set = 0;
 	float target_bitrate = 0;

 	int print_block_mode_histogram = 0;

 	float log10_texels_2d = 0.0f;
 	float log10_texels_3d = 0.0f;

 	int low_fstop = -10;
 	int high_fstop = 10;

 	// parse the command line's encoding options.
 	int argidx;
 	if (opmode == 0 || opmode == 2)
 	{
 		if (argc < 5)
 		{
 			printf("Cannot encode without specifying blocksize\n");
 			exit(1);
 		}

 		if (strchr(argv[4], '.') != NULL)
 		{
 			target_bitrate = static_cast < float >(atof(argv[4]));
 			target_bitrate_set = 1;
 			find_closest_blockdim_2d(target_bitrate, &xdim_2d, &ydim_2d);
 			find_closest_blockdim_3d(target_bitrate, &xdim_3d, &ydim_3d, &zdim_3d);
 		}
 		else
 		{
 			int dimensions = sscanf(argv[4], "%dx%dx%d", &xdim_3d, &ydim_3d, &zdim_3d);
 			switch (dimensions)
 			{
 			case 0:
 			case 1:
 				// failed to parse the blocksize argument at all.
 				printf("Blocksize not specified\n");
 				exit(1);
 			case 2:
 				{
 					zdim_3d = 1;

 					// Check 2D constraints
 					if(!(xdim_3d ==4 || xdim_3d == 5 || xdim_3d == 6 || xdim_3d == 8 || xdim_3d == 10 || xdim_3d == 12) ||
 					   !(ydim_3d ==4 || ydim_3d == 5 || ydim_3d == 6 || ydim_3d == 8 || ydim_3d == 10 || ydim_3d == 12) )
 					{
 						printf("Block dimensions %d x %d unsupported\n", xdim_3d, ydim_3d);
 						exit(1);
 					}

 					int is_legal_2d = (xdim_3d==ydim_3d) || (xdim_3d==ydim_3d+1) || ((xdim_3d==ydim_3d+2) && !(xdim_3d==6 && ydim_3d==4)) ||
 									  (xdim_3d==8 && ydim_3d==5) || (xdim_3d==10 && ydim_3d==5) || (xdim_3d==10 && ydim_3d==6);

 					if(!is_legal_2d)
 					{
 						printf("Block dimensions %d x %d disallowed\n", xdim_3d, ydim_3d);
 						exit(1);
 					}
 				}
 				break;
 			default:
 				{
 					// Check 3D constraints
 					if(xdim_3d < 3 || xdim_3d > 6 || ydim_3d < 3 || ydim_3d > 6 || zdim_3d < 3 || zdim_3d > 6)
 					{
 						printf("Block dimensions %d x %d x %d unsupported\n", xdim_3d, ydim_3d, zdim_3d);
 						exit(1);
 					}

 					int is_legal_3d = ((xdim_3d==ydim_3d)&&(ydim_3d==zdim_3d)) || ((xdim_3d==ydim_3d+1)&&(ydim_3d==zdim_3d)) || ((xdim_3d==ydim_3d)&&(ydim_3d==zdim_3d+1));

 					if(!is_legal_3d)
 					{
 						printf("Block dimensions %d x %d x %d disallowed\n", xdim_3d, ydim_3d, zdim_3d);
 						exit(1);
 					}
 				}
 				break;
 			}

 			xdim_2d = xdim_3d;
 			ydim_2d = ydim_3d;
 		}

 		log10_texels_2d = log((float)(xdim_2d * ydim_2d)) / log(10.0f);
 		log10_texels_3d = log((float)(xdim_3d * ydim_3d * zdim_3d)) / log(10.0f);
 		argidx = 5;
 	}
 	else
 	{
 		// for decode and comparison, block size is not needed.
 		argidx = 4;
 	}

 	while (argidx < argc)
 	{
 		if (!strcmp(argv[argidx], "-silentmode"))
 		{
 			argidx++;
 			silentmode = 1;
 			suppress_progress_counter = 1;
 		}
 		else if (!strcmp(argv[argidx], "-time"))
 		{
 			argidx++;
 			timemode = 1;
 		}
 		else if (!strcmp(argv[argidx], "-showpsnr"))
 		{
 			argidx++;
 			psnrmode = 1;
 		}
 		else if (!strcmp(argv[argidx], "-v"))
 		{
 			argidx += 7;
 			if (argidx > argc)
 			{
 				printf("-v switch with less than 6 arguments, quitting\n");
 				exit(1);
 			}
 			ewp.mean_stdev_radius = atoi(argv[argidx - 6]);
 			ewp.rgb_power = static_cast < float >(atof(argv[argidx - 5]));
 			ewp.rgb_base_weight = static_cast < float >(atof(argv[argidx - 4]));
 			ewp.rgb_mean_weight = static_cast < float >(atof(argv[argidx - 3]));
 			ewp.rgb_stdev_weight = static_cast < float >(atof(argv[argidx - 2]));
 			ewp.rgb_mean_and_stdev_mixing = static_cast < float >(atof(argv[argidx - 1]));
 		}
 		else if (!strcmp(argv[argidx], "-va"))
 		{
 			argidx += 5;
 			if (argidx > argc)
 			{
 				printf("-va switch with less than 4 arguments, quitting\n");
 				exit(1);
 			}
 			ewp.alpha_power = static_cast < float >(atof(argv[argidx - 4]));
 			ewp.alpha_base_weight = static_cast < float >(atof(argv[argidx - 3]));
 			ewp.alpha_mean_weight = static_cast < float >(atof(argv[argidx - 2]));
 			ewp.alpha_stdev_weight = static_cast < float >(atof(argv[argidx - 1]));
 		}
 		else if (!strcmp(argv[argidx], "-ch"))
 		{
 			argidx += 5;
 			if (argidx > argc)
 			{
 				printf("-ch switch with less than 4 arguments\n");
 				exit(1);
 			}
 			ewp.rgba_weights[0] = static_cast < float >(atof(argv[argidx - 4]));
 			ewp.rgba_weights[1] = static_cast < float >(atof(argv[argidx - 3]));
 			ewp.rgba_weights[2] = static_cast < float >(atof(argv[argidx - 2]));
 			ewp.rgba_weights[3] = static_cast < float >(atof(argv[argidx - 1]));
 		}
 		else if (!strcmp(argv[argidx], "-a"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-a switch with no argument\n");
 				exit(1);
 			}
 			ewp.enable_rgb_scale_with_alpha = 1;
 			ewp.alpha_radius = atoi(argv[argidx - 1]);
 		}
 		else if (!strcmp(argv[argidx], "-rn"))
 		{
 			argidx++;
 			ewp.ra_normal_angular_scale = 1;
 		}
 		else if (!strcmp(argv[argidx], "-b"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-b switch with no argument\n");
 				exit(1);
 			}
 			ewp.block_artifact_suppression = static_cast < float >(atof(argv[argidx - 1]));
 		}
 		else if (!strcmp(argv[argidx], "-esw"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-esw switch with no argument\n");
 				exit(1);
 			}

 			if (strlen(argv[argidx - 1]) != 4)
 			{
 				printf("Swizzle pattern for the -esw switch must have exactly 4 characters\n");
 				exit(1);
 			}

 			int swizzle_components[4];
 			for (int i = 0; i < 4; i++)
 				switch (argv[argidx - 1][i])
 				{
 				case 'r':
 					swizzle_components[i] = 0;
 					break;
 				case 'g':
 					swizzle_components[i] = 1;
 					break;
 				case 'b':
 					swizzle_components[i] = 2;
 					break;
 				case 'a':
 					swizzle_components[i] = 3;
 					break;
 				case '0':
 					swizzle_components[i] = 4;
 					break;
 				case '1':
 					swizzle_components[i] = 5;
 					break;
 				default:
 					printf("Character '%c' is not a valid swizzle-character\n", argv[argidx - 1][i]);
 					exit(1);
 				}
 			swz_encode.r = swizzle_components[0];
 			swz_encode.g = swizzle_components[1];
 			swz_encode.b = swizzle_components[2];
 			swz_encode.a = swizzle_components[3];
 		}
 		else if (!strcmp(argv[argidx], "-dsw"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-dsw switch with no argument\n");
 				exit(1);
 			}

 			if (strlen(argv[argidx - 1]) != 4)
 			{
 				printf("Swizzle pattern for the -dsw switch must have exactly 4 characters\n");
 				exit(1);
 			}

 			int swizzle_components[4];
 			for (int i = 0; i < 4; i++)
 			{
 				switch (argv[argidx - 1][i])
 				{
 				case 'r':
 					swizzle_components[i] = 0;
 					break;
 				case 'g':
 					swizzle_components[i] = 1;
 					break;
 				case 'b':
 					swizzle_components[i] = 2;
 					break;
 				case 'a':
 					swizzle_components[i] = 3;
 					break;
 				case '0':
 					swizzle_components[i] = 4;
 					break;
 				case '1':
 					swizzle_components[i] = 5;
 					break;
 				case 'z':
 					swizzle_components[i] = 6;
 					break;
 				default:
 					printf("Character '%c' is not a valid swizzle-character\n", argv[argidx - 1][i]);
 					exit(1);
 				}
 			}
 			swz_decode.r = swizzle_components[0];
 			swz_decode.g = swizzle_components[1];
 			swz_decode.b = swizzle_components[2];
 			swz_decode.a = swizzle_components[3];
 		}
 		// presets begin here
 		else if (!strcmp(argv[argidx], "-normal_psnr"))
 		{
 			argidx++;
 			ewp.rgba_weights[0] = 1.0f;
 			ewp.rgba_weights[1] = 0.0f;
 			ewp.rgba_weights[2] = 0.0f;
 			ewp.rgba_weights[3] = 1.0f;
 			ewp.ra_normal_angular_scale = 1;
 			swz_encode.r = 0;	// r <- red
 			swz_encode.g = 0;	// g <- red
 			swz_encode.b = 0;	// b <- red
 			swz_encode.a = 1;	// a <- green
 			swz_decode.r = 0;	// r <- red
 			swz_decode.g = 3;	// g <- alpha
 			swz_decode.b = 6;	// b <- reconstruct
 			swz_decode.a = 5;	// 1.0

 			oplimit_user_specified = 1000.0f;
 			oplimit_set_by_user = 1;
 			mincorrel_user_specified = 0.99f;
 			mincorrel_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-normal_percep"))
 		{
 			argidx++;
 			ewp.rgba_weights[0] = 1.0f;
 			ewp.rgba_weights[1] = 0.0f;
 			ewp.rgba_weights[2] = 0.0f;
 			ewp.rgba_weights[3] = 1.0f;
 			ewp.ra_normal_angular_scale = 1;
 			swz_encode.r = 0;	// r <- red
 			swz_encode.g = 0;	// g <- red
 			swz_encode.b = 0;	// b <- red
 			swz_encode.a = 1;	// a <- green
 			swz_decode.r = 0;	// r <- red
 			swz_decode.g = 3;	// g <- alpha
 			swz_decode.b = 6;	// b <- reconstruct
 			swz_decode.a = 5;	// 1.0

 			oplimit_user_specified = 1000.0f;
 			oplimit_set_by_user = 1;
 			mincorrel_user_specified = 0.99f;
 			mincorrel_set_by_user = 1;

 			dblimit_user_specified = 999;
 			dblimit_set_by_user = 1;

 			ewp.block_artifact_suppression = 1.8f;
 			ewp.mean_stdev_radius = 3;
 			ewp.rgb_mean_weight = 0;
 			ewp.rgb_stdev_weight = 50;
 			ewp.rgb_mean_and_stdev_mixing = 0.0;
 			ewp.alpha_mean_weight = 0;
 			ewp.alpha_stdev_weight = 50;
 		}
 		else if (!strcmp(argv[argidx], "-mask"))
 		{
 			argidx++;
 			ewp.mean_stdev_radius = 3;
 			ewp.rgb_mean_weight = 0.0f;
 			ewp.rgb_stdev_weight = 25.0f;
 			ewp.rgb_mean_and_stdev_mixing = 0.03f;
 			ewp.alpha_mean_weight = 0.0f;
 			ewp.alpha_stdev_weight = 25.0f;
 		}
 		else if (!strcmp(argv[argidx], "-alphablend"))
 		{
 			argidx++;
 			ewp.enable_rgb_scale_with_alpha = 1;
 			ewp.alpha_radius = 1;
 		}
 		else if (!strcmp(argv[argidx], "-hdra"))
 		{
 			if(decode_mode != DECODE_HDR)
 			{
 				printf("The option -hdra is only available in HDR mode\n");
 				exit(1);
 			}

 			argidx++;
 			ewp.mean_stdev_radius = 0;
 			ewp.rgb_power = 0.75;
 			ewp.rgb_base_weight = 0;
 			ewp.rgb_mean_weight = 1;
 			ewp.alpha_power = 0.75;
 			ewp.alpha_base_weight = 0;
 			ewp.alpha_mean_weight = 1;
 			rgb_force_use_of_hdr = 1;
 			alpha_force_use_of_hdr = 1;
 			dblimit_user_specified = 999;
 			dblimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-hdr"))
 		{
 			if(decode_mode != DECODE_HDR)
 			{
 				printf("The option -hdr is only available in HDR mode\n");
 				exit(1);
 			}

 			argidx++;
 			ewp.mean_stdev_radius = 0;
 			ewp.rgb_power = 0.75;
 			ewp.rgb_base_weight = 0;
 			ewp.rgb_mean_weight = 1;
 			ewp.alpha_base_weight = 0.05f;
 			rgb_force_use_of_hdr = 1;
 			alpha_force_use_of_hdr = 0;
 			dblimit_user_specified = 999;
 			dblimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-hdra_log"))
 		{
 			if(decode_mode != DECODE_HDR)
 			{
 				printf("The option -hdra_log is only available in HDR mode\n");
 				exit(1);
 			}

 			argidx++;
 			ewp.mean_stdev_radius = 0;
 			ewp.rgb_power = 1;
 			ewp.rgb_base_weight = 0;
 			ewp.rgb_mean_weight = 1;
 			ewp.alpha_power = 1;
 			ewp.alpha_base_weight = 0;
 			ewp.alpha_mean_weight = 1;
 			rgb_force_use_of_hdr = 1;
 			alpha_force_use_of_hdr = 1;
 			dblimit_user_specified = 999;
 			dblimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-hdr_log"))
 		{
 			argidx++;
 			ewp.mean_stdev_radius = 0;
 			ewp.rgb_power = 1;
 			ewp.rgb_base_weight = 0;
 			ewp.rgb_mean_weight = 1;
 			ewp.alpha_base_weight = 0.05f;
 			rgb_force_use_of_hdr = 1;
 			alpha_force_use_of_hdr = 0;
 			dblimit_user_specified = 999;
 			dblimit_set_by_user = 1;
 		}
 		// presets end here
 		else if (!strcmp(argv[argidx], "-forcehdr_rgb"))
 		{
 			if(decode_mode != DECODE_HDR)
 			{
 				printf("The option -forcehdr_rgb is only available in HDR mode\n");
 				exit(1);
 			}

 			argidx++;
 			rgb_force_use_of_hdr = 1;
 		}
 		else if (!strcmp(argv[argidx], "-forcehdr_rgba"))
 		{
 			if(decode_mode != DECODE_HDR)
 			{
 				printf("The option -forcehdr_rgbs is only available in HDR mode\n");
 				exit(1);
 			}

 			argidx++;
 			rgb_force_use_of_hdr = 1;
 			alpha_force_use_of_hdr = 1;
 		}
 		else if (!strcmp(argv[argidx], "-bmc"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-bmc switch with no argument\n");
 				exit(1);
 			}
 			float cutoff = (float)atof(argv[argidx - 1]);
 			if (cutoff > 100 || !(cutoff >= 0))
 				cutoff = 100;
 			bmc_user_specified = cutoff;
 			bmc_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-plimit"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-plimit switch with no argument\n");
 				exit(1);
 			}
 			plimit_user_specified = atoi(argv[argidx - 1]);
 			plimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-dblimit"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-dblimit switch with no argument\n");
 				exit(1);
 			}
 			dblimit_user_specified = static_cast < float >(atof(argv[argidx - 1]));
 			dblimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-oplimit"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-oplimit switch with no argument\n");
 				exit(1);
 			}
 			oplimit_user_specified = static_cast < float >(atof(argv[argidx - 1]));
 			oplimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-mincorrel"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-mincorrel switch with no argument\n");
 				exit(1);
 			}
 			mincorrel_user_specified = static_cast < float >(atof(argv[argidx - 1]));
 			mincorrel_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-maxiters"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-maxiters switch with no argument\n");
 				exit(1);
 			}
 			maxiters_user_specified = atoi(argv[argidx - 1]);
 			maxiters_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-veryfast"))
 		{
 			argidx++;
 			plimit_autoset = 2;
 			oplimit_autoset = 1.0;
 			dblimit_autoset_2d = MAX(70 - 35 * log10_texels_2d, 53 - 19 * log10_texels_2d);
 			dblimit_autoset_3d = MAX(70 - 35 * log10_texels_3d, 53 - 19 * log10_texels_3d);
 			bmc_autoset = 25;
 			mincorrel_autoset = 0.5;
 			maxiters_autoset = 1;

 			switch (ydim_2d)
 			{
 			case 4:
 				pcdiv = 240;
 				break;
 			case 5:
 				pcdiv = 56;
 				break;
 			case 6:
 				pcdiv = 64;
 				break;
 			case 8:
 				pcdiv = 47;
 				break;
 			case 10:
 				pcdiv = 36;
 				break;
 			case 12:
 				pcdiv = 30;
 				break;
 			default:
 				pcdiv = 30;
 				break;
 			}
 			preset_has_been_set++;
 		}
 		else if (!strcmp(argv[argidx], "-fast"))
 		{
 			argidx++;
 			plimit_autoset = 4;
 			oplimit_autoset = 1.0;
 			mincorrel_autoset = 0.5;
 			dblimit_autoset_2d = MAX(85 - 35 * log10_texels_2d, 63 - 19 * log10_texels_2d);
 			dblimit_autoset_3d = MAX(85 - 35 * log10_texels_3d, 63 - 19 * log10_texels_3d);
 			bmc_autoset = 50;
 			maxiters_autoset = 1;

 			switch (ydim_2d)
 			{
 			case 4:
 				pcdiv = 60;
 				break;
 			case 5:
 				pcdiv = 27;
 				break;
 			case 6:
 				pcdiv = 30;
 				break;
 			case 8:
 				pcdiv = 24;
 				break;
 			case 10:
 				pcdiv = 16;
 				break;
 			case 12:
 				pcdiv = 20;
 				break;
 			default:
 				pcdiv = 20;
 				break;
 			};
 			preset_has_been_set++;
 		}
 		else if (!strcmp(argv[argidx], "-medium"))
 		{
 			argidx++;
 			plimit_autoset = 25;
 			oplimit_autoset = 1.2f;
 			mincorrel_autoset = 0.75f;
 			dblimit_autoset_2d = MAX(95 - 35 * log10_texels_2d, 70 - 19 * log10_texels_2d);
 			dblimit_autoset_3d = MAX(95 - 35 * log10_texels_3d, 70 - 19 * log10_texels_3d);
 			bmc_autoset = 75;
 			maxiters_autoset = 2;

 			switch (ydim_2d)
 			{
 			case 4:
 				pcdiv = 25;
 				break;
 			case 5:
 				pcdiv = 15;
 				break;
 			case 6:
 				pcdiv = 15;
 				break;
 			case 8:
 				pcdiv = 10;
 				break;
 			case 10:
 				pcdiv = 8;
 				break;
 			case 12:
 				pcdiv = 6;
 				break;
 			default:
 				pcdiv = 6;
 				break;
 			};
 			preset_has_been_set++;
 		}
 		else if (!strcmp(argv[argidx], "-thorough"))
 		{
 			argidx++;
 			plimit_autoset = 100;
 			oplimit_autoset = 2.5f;
 			mincorrel_autoset = 0.95f;
 			dblimit_autoset_2d = MAX(105 - 35 * log10_texels_2d, 77 - 19 * log10_texels_2d);
 			dblimit_autoset_3d = MAX(105 - 35 * log10_texels_3d, 77 - 19 * log10_texels_3d);
 			bmc_autoset = 95;
 			maxiters_autoset = 4;

 			switch (ydim_2d)
 			{
 			case 4:
 				pcdiv = 12;
 				break;
 			case 5:
 				pcdiv = 7;
 				break;
 			case 6:
 				pcdiv = 7;
 				break;
 			case 8:
 				pcdiv = 5;
 				break;
 			case 10:
 				pcdiv = 4;
 				break;
 			case 12:
 				pcdiv = 3;
 				break;
 			default:
 				pcdiv = 3;
 				break;
 			};
 			preset_has_been_set++;
 		}
 		else if (!strcmp(argv[argidx], "-exhaustive"))
 		{
 			argidx++;
 			plimit_autoset = PARTITION_COUNT;
 			oplimit_autoset = 1000.0f;
 			mincorrel_autoset = 0.99f;
 			dblimit_autoset_2d = 999.0f;
 			dblimit_autoset_3d = 999.0f;
 			bmc_autoset = 100;
 			maxiters_autoset = 4;

 			preset_has_been_set++;
 			switch (ydim_2d)
 			{
 			case 4:
 				pcdiv = 3;
 				break;
 			case 5:
 				pcdiv = 1;
 				break;
 			case 6:
 				pcdiv = 1;
 				break;
 			case 8:
 				pcdiv = 1;
 				break;
 			case 10:
 				pcdiv = 1;
 				break;
 			case 12:
 				pcdiv = 1;
 				break;
 			default:
 				pcdiv = 1;
 				break;
 			}
 		}
 		else if (!strcmp(argv[argidx], "-j"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-j switch with no argument\n");
 				exit(1);
 			}
 			thread_count = atoi(argv[argidx - 1]);
 		}
 		else if (!strcmp(argv[argidx], "-srgb"))
 		{
 			argidx++;
 			perform_srgb_transform = 1;
 			dblimit_user_specified = 60;
 			dblimit_set_by_user = 1;
 		}
 		else if (!strcmp(argv[argidx], "-mpsnr"))
 		{
 			argidx += 3;
 			if (argidx > argc)
 			{
 				printf("-mpsnr switch with less than 2 arguments\n");
 				exit(1);
 			}
 			low_fstop = atoi(argv[argidx - 2]);
 			high_fstop = atoi(argv[argidx - 1]);
 			if (high_fstop < low_fstop)
 			{
 				printf("For -mpsnr switch, the <low> argument cannot be greater than the\n" "high argument.\n");
 				exit(1);
 			}
 		}
 		else if (!strcmp(argv[argidx], "-diag"))
 		{
 			argidx += 2;
 			if (argidx > argc)
 			{
 				printf("-diag switch with no argument\n");
 				exit(1);
 			}

 			#ifdef DEBUG_PRINT_DIAGNOSTICS
 				diagnostics_tile = atoi(argv[argidx - 1]);
 			#else
 				printf("-diag switch given, but codec has been compiled without\n" "DEBUG_PRINT_DIAGNOSTICS enabled; please recompile.\n");
 				exit(1);
 			#endif
 		}
 		else if (!strcmp(argv[argidx], "-bmstat"))
 		{
 			argidx++;
 			print_block_mode_histogram = 1;
 		}
 		else if (!strcmp(argv[argidx], "-pte"))
 		{
 			argidx++;
 			print_tile_errors = 1;
 		}
 		else if (!strcmp(argv[argidx], "-stats"))
 		{
 			argidx++;
 			print_statistics = 1;
 		}
 		// Option: Encode a 3D image from an array of 2D images.
 		else if (!strcmp(argv[argidx], "-array"))
 		{
 			// Only supports compressing (not decompressing or comparison).
 			if (opmode != 0)
 			{
 				printf("-array switch given when not compressing files - decompression and comparison of arrays not supported.\n");
 				exit(1);
 			}

 			// Image depth must be specified.
 			if (argidx + 2 > argc)
 			{
 				printf("-array switch given, but no array size (image depth) given.\n");
 				exit(1);
 			}
 			argidx++;

 			// Read array size (image depth).
 			if (!sscanf(argv[argidx], "%d", &array_size) || array_size == 0)
 			{
 				printf("Invalid array size (image depth) given with -array option: \"%s\".\n", argv[argidx]);
 				exit(1);
 			}
 			argidx++;
 		}
 		else
 		{
 			printf("Commandline argument \"%s\" not recognized\n", argv[argidx]);
 			exit(1);
 		}
 	}

 	if (opmode == 4)
 	{
 		compare_two_files(input_filename, output_filename, low_fstop, high_fstop, psnrmode);
 		exit(0);
 	}

 	float texel_avg_error_limit_2d = 0.0f;
 	float texel_avg_error_limit_3d = 0.0f;

 	if (opmode == 0 || opmode == 2)
 	{
 		// if encode, process the parsed command line values

 		if (preset_has_been_set != 1)
 		{
 			printf("For encoding, need to specify exactly one performance-quality\n"
 				   "trade-off preset option. The available presets are:\n" " -veryfast\n" " -fast\n" " -medium\n" " -thorough\n" " -exhaustive\n");
 			exit(1);
 		}

 		progress_counter_divider = pcdiv;

 		int partitions_to_test = plimit_set_by_user ? plimit_user_specified : plimit_autoset;
 		float dblimit_2d = dblimit_set_by_user ? dblimit_user_specified : dblimit_autoset_2d;
 		float dblimit_3d = dblimit_set_by_user ? dblimit_user_specified : dblimit_autoset_3d;
 		float oplimit = oplimit_set_by_user ? oplimit_user_specified : oplimit_autoset;
 		float mincorrel = mincorrel_set_by_user ? mincorrel_user_specified : mincorrel_autoset;

 		int maxiters = maxiters_set_by_user ? maxiters_user_specified : maxiters_autoset;
 		ewp.max_refinement_iters = maxiters;

 		ewp.block_mode_cutoff = (bmc_set_by_user ? bmc_user_specified : bmc_autoset) / 100.0f;

 		if (rgb_force_use_of_hdr == 0)
 		{
 			texel_avg_error_limit_2d = pow(0.1f, dblimit_2d * 0.1f) * 65535.0f * 65535.0f;
 			texel_avg_error_limit_3d = pow(0.1f, dblimit_3d * 0.1f) * 65535.0f * 65535.0f;
 		}
 		else
 		{
 			texel_avg_error_limit_2d = 0.0f;
 			texel_avg_error_limit_3d = 0.0f;
 		}

 		ewp.partition_1_to_2_limit = oplimit;
 		ewp.lowest_correlation_cutoff = mincorrel;

 		if (partitions_to_test < 1)
 			partitions_to_test = 1;
 		else if (partitions_to_test > PARTITION_COUNT)
 			partitions_to_test = PARTITION_COUNT;

 		ewp.partition_search_limit = partitions_to_test;

 		// if diagnostics are run, force the thread count to 1.
 		if (
 			#ifdef DEBUG_PRINT_DIAGNOSTICS
 			   diagnostics_tile >= 0 ||
 			#endif
 			   print_tile_errors > 0 || print_statistics > 0)
 		{
 			thread_count = 1;
 			thread_count_autodetected = 0;
 		}

 		if (thread_count < 1)
 		{
 			thread_count = get_number_of_cpus();
 			thread_count_autodetected = 1;
 		}


 		// Specifying the error weight of a color component as 0 is not allowed.
 		// If weights are 0, then they are instead set to a small positive value.

 		float max_color_component_weight = MAX(MAX(ewp.rgba_weights[0], ewp.rgba_weights[1]),
 											   MAX(ewp.rgba_weights[2], ewp.rgba_weights[3]));
 		ewp.rgba_weights[0] = MAX(ewp.rgba_weights[0], max_color_component_weight / 1000.0f);
 		ewp.rgba_weights[1] = MAX(ewp.rgba_weights[1], max_color_component_weight / 1000.0f);
 		ewp.rgba_weights[2] = MAX(ewp.rgba_weights[2], max_color_component_weight / 1000.0f);
 		ewp.rgba_weights[3] = MAX(ewp.rgba_weights[3], max_color_component_weight / 1000.0f);


 		// print all encoding settings unless specifically told otherwise.
 		if (!silentmode)
 		{
 			printf("Encoding settings:\n\n");
 			if (target_bitrate_set)
 				printf("Target bitrate provided: %.2f bpp\n", target_bitrate);
 			printf("2D Block size: %dx%d (%.2f bpp)\n", xdim_2d, ydim_2d, 128.0 / (xdim_2d * ydim_2d));
 			printf("3D Block size: %dx%dx%d (%.2f bpp)\n", xdim_3d, ydim_3d, zdim_3d, 128.0 / (xdim_3d * ydim_3d * zdim_3d));
 			printf("Radius for mean-and-stdev calculations: %d texels\n", ewp.mean_stdev_radius);
 			printf("RGB power: %g\n", ewp.rgb_power);
 			printf("RGB base-weight: %g\n", ewp.rgb_base_weight);
 			printf("RGB local-mean weight: %g\n", ewp.rgb_mean_weight);
 			printf("RGB local-stdev weight: %g\n", ewp.rgb_stdev_weight);
 			printf("RGB mean-and-stdev mixing across color channels: %g\n", ewp.rgb_mean_and_stdev_mixing);
 			printf("Alpha power: %g\n", ewp.alpha_power);
 			printf("Alpha base-weight: %g\n", ewp.alpha_base_weight);
 			printf("Alpha local-mean weight: %g\n", ewp.alpha_mean_weight);
 			printf("Alpha local-stdev weight: %g\n", ewp.alpha_stdev_weight);
 			printf("RGB weights scale with alpha: ");
 			if (ewp.enable_rgb_scale_with_alpha)
 				printf("enabled (radius=%d)\n", ewp.alpha_radius);
 			else
 				printf("disabled\n");
 			printf("Color channel relative weighting: R=%g G=%g B=%g A=%g\n", ewp.rgba_weights[0], ewp.rgba_weights[1], ewp.rgba_weights[2], ewp.rgba_weights[3]);
 			printf("Block-artifact suppression parameter : %g\n", ewp.block_artifact_suppression);
 			printf("Number of distinct partitionings to test: %d (%s)\n", ewp.partition_search_limit, plimit_set_by_user ? "specified by user" : "preset");
 			printf("PSNR decibel limit: 2D: %f 3D: %f (%s)\n", dblimit_2d, dblimit_3d, dblimit_set_by_user ? "specified by user" : "preset");
 			printf("1->2 partition limit: %f\n", oplimit);
 			printf("Dual-plane color-correlation cutoff: %f (%s)\n", mincorrel, mincorrel_set_by_user ? "specified by user" : "preset");
 			printf("Block Mode Percentile Cutoff: %f (%s)\n", ewp.block_mode_cutoff * 100.0f, bmc_set_by_user ? "specified by user" : "preset");
 			printf("Max refinement iterations: %d (%s)\n", ewp.max_refinement_iters, maxiters_set_by_user ? "specified by user" : "preset");
 			printf("Thread count : %d (%s)\n", thread_count, thread_count_autodetected ? "autodetected" : "specified by user");
 			printf("\n");
 		}
 	}

 	int padding = MAX(ewp.mean_stdev_radius, ewp.alpha_radius);

 	// determine encoding bitness as follows:
 	// if enforced by the output format, follow the output format's result
 	// else use decode_mode to pick bitness.
 	int out_bitness = get_output_filename_enforced_bitness(output_filename);
 	if (out_bitness == -1)
 	{
 		out_bitness = (decode_mode == DECODE_HDR) ? 16 : 8;
 	}

 	int xdim = -1;
 	int ydim = -1;
 	int zdim = -1;

 	// Temporary image array (for merging multiple 2D images into one 3D image).
 	int *load_results = NULL;
 	astc_codec_image **input_images = NULL;

 	int load_result = 0;
 	astc_codec_image *input_image = NULL;
 	astc_codec_image *output_image = NULL;
 	int input_components = 0;

 	int input_image_is_hdr = 0;

 	// load image
 	if (opmode == 0 || opmode == 2 || opmode == 3)
 	{
 		// Allocate arrays for image data and load results.
 		load_results = new int[array_size];
 		input_images = new astc_codec_image *[array_size];

 		// Iterate over all input images.
 		for (int image_index = 0; image_index < array_size; image_index++)
 		{
 			// 2D input data.
 			if (array_size == 1)
 			{
 				input_images[image_index] = astc_codec_load_image(input_filename, padding, &load_results[image_index]);
 			}
 			// 3D input data - multiple 2D images.
 			else
 			{
 				char new_input_filename[256];

 				// Check for extension: <name>.<extension>
 				if (NULL == strrchr(input_filename, '.'))
 				{
 					printf("Unable to determine file type from extension: %s\n", input_filename);
 					exit(1);
 				}

 				// Construct new file name and load: <name>_N.<extension>
 				strcpy(new_input_filename, input_filename);
 				sprintf(strrchr(new_input_filename, '.'), "_%d%s", image_index, strrchr(input_filename, '.'));
 				input_images[image_index] = astc_codec_load_image(new_input_filename, padding, &load_results[image_index]);

 				// Check image is not 3D.
 				if (input_images[image_index]->zsize != 1)
 				{
 					printf("3D source images not supported with -array option: %s\n", new_input_filename);
 					exit(1);
 				}
 			}

 			// Check load result.
 			if (load_results[image_index] < 0)
 			{
 				printf("Failed to load image %s\n", input_filename);
 				exit(1);
 			}

 			// Check format matches other slices.
 			if (load_results[image_index] != load_results[0])
 			{
 				printf("Mismatching image format - image 0 and %d are a different format\n", image_index);
 				exit(1);
 			}
 		}

 		load_result = load_results[0];

 		// Assign input image.
 		if (array_size == 1)
 		{
 			input_image = input_images[0];
 		}
 		// Merge input image data.
 		else
 		{
 			int z, xsize, ysize, zsize, bitness, slice_size;

 			xsize = input_images[0]->xsize;
 			ysize = input_images[0]->ysize;
 			zsize = array_size;
 			bitness = (load_result & 0x80) ? 16 : 8;
 			slice_size = (xsize + (2 * padding)) * (ysize + (2 * padding));

 			// Allocate image memory.
 			input_image = allocate_image(bitness, xsize, ysize, zsize, padding);

 			// Combine 2D source images into one 3D image (skip padding slices as these don't exist in 2D textures).
 			for (z = padding; z < zsize + padding; z++)
 			{
 				if (bitness == 8)
 				{
 					memcpy(*input_image->imagedata8[z], *input_images[z - padding]->imagedata8[0], slice_size * 4 * sizeof(uint8_t));
 				}
 				else
 				{
 					memcpy(*input_image->imagedata16[z], *input_images[z - padding]->imagedata16[0], slice_size * 4 * sizeof(uint16_t));
 				}
 			}

 			// Clean up temporary images.
 			for (int i = 0; i < array_size; i++)
 			{
 				destroy_image(input_images[i]);
 			}

 			// Clamp texels outside the actual image area.
 			fill_image_padding_area(input_image);
 		}

 		delete[] input_images;
 		input_images = NULL;

 		delete[] load_results;
 		load_results = NULL;

 		input_components = load_result & 7;
 		input_image_is_hdr = (load_result & 0x80) ? 1 : 0;

 		if (input_image->zsize > 1)
 		{
 			xdim = xdim_3d;
 			ydim = ydim_3d;
 			zdim = zdim_3d;
 			ewp.texel_avg_error_limit = texel_avg_error_limit_3d;
 		}
 		else
 		{
 			xdim = xdim_2d;
 			ydim = ydim_2d;
 			zdim = 1;
 			ewp.texel_avg_error_limit = texel_avg_error_limit_2d;
 		}

 		expand_block_artifact_suppression(xdim, ydim, zdim, &ewp);

 		if (!silentmode)
 		{
 			printf("%s: %dD %s image, %d x %d x %d, %d components\n\n",
 				   input_filename, input_image->zsize > 1 ? 3 : 2, input_image_is_hdr ? "HDR" : "LDR", input_image->xsize, input_image->ysize, input_image->zsize, load_result & 7);
 		}

 		if (padding > 0 || ewp.rgb_mean_weight != 0.0f || ewp.rgb_stdev_weight != 0.0f || ewp.alpha_mean_weight != 0.0f || ewp.alpha_stdev_weight != 0.0f)
 		{
 			if (!silentmode)
 			{
 				printf("Computing texel-neighborhood means and variances ... ");
 				fflush(stdout);
 			}
 			compute_averages_and_variances(input_image, ewp.rgb_power, ewp.alpha_power, ewp.mean_stdev_radius, ewp.alpha_radius, swz_encode);
 			if (!silentmode)
 			{
 				printf("done\n");
 				fflush(stdout);
 			}
 		}
 	}

 	start_coding_time = get_time();

 	if (opmode == 1)
 		output_image = load_astc_file(input_filename, out_bitness, decode_mode, swz_decode);

 	double image_size = 0.0f;
 	if (input_image)
 	{
 		image_size = input_image->xsize * input_image->ysize * input_image->zsize;
 	}
 	else
 	{
 		image_size = output_image->xsize * output_image->ysize * output_image->zsize;
 	}

 	// process image, if relevant
 	if (opmode == 2)
 		output_image = pack_and_unpack_astc_image(input_image, xdim, ydim, zdim, &ewp, decode_mode, swz_encode, swz_decode, out_bitness, thread_count);

 	end_coding_time = get_time();

 	// print PSNR if encoding
 	if (opmode == 2)
 	{
 		if (psnrmode == 1)
 		{
 			compute_error_metrics(input_image_is_hdr, input_components, input_image, output_image, low_fstop, high_fstop, psnrmode);
 		}
 	}

 	// store image
 	if (opmode == 1 || opmode == 2)
 	{
 		int store_result = -1;
 		const char *format_string = "";

 		store_result = astc_codec_store_image(output_image, output_filename, out_bitness, &format_string);

 		if (store_result < 0)
 		{
 			printf("Failed to store image %s\n", output_filename);
 			exit(1);
 		}
 		else
 		{
 			if (!silentmode)
 			{
 				printf("Stored %s image %s with %d color channels\n", format_string, output_filename, store_result);
 			}
 		}
 	}

 	if (opmode == 0)
 	{
 		store_astc_file(input_image, output_filename, xdim, ydim, zdim, &ewp, decode_mode, swz_encode, thread_count);
 	}

 	destroy_image(input_image);

 	if (print_block_mode_histogram)
 	{
 		printf("%s ", argv[2]);
 		printf("%d %d  ", xdim_2d, ydim_2d);
 		for (int i = 0; i < 2048; i++)
 		{
 			printf(" %d", block_mode_histogram[i]);
 		}
 		printf("\n");
 	}

 	end_time = get_time();

 	if (timemode)
 	{
 		double tex_rate = image_size / (end_coding_time - start_coding_time);
 		tex_rate = tex_rate / 1000000.0;

 		printf("\nPerformance metrics\n");
 		printf("===================\n\n");
 		printf("    Total time:                %8.4f s\n", end_time - start_time);
 		printf("    Coding time:               %8.4f s\n", end_coding_time - start_coding_time);
 		printf("    Coding rate:               %8.4f MT/s\n", tex_rate);
 	}

 	return 0;
 }