Source/astcenc_mathlib.h - external/github.com/ARM-software/astc-encoder - Git at Google

 // SPDX-License-Identifier: Apache-2.0
 // ----------------------------------------------------------------------------
 // Copyright 2011-2026 Arm Limited
 //
 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 // use this file except in compliance with the License. You may obtain a copy
 // of the License at:
 //
 //     http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing, software
 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 // License for the specific language governing permissions and limitations
 // under the License.
 // ----------------------------------------------------------------------------

 /*
  * This module implements a variety of mathematical data types and library
  * functions used by the codec.
  */

 #ifndef ASTC_MATHLIB_H_INCLUDED
 #define ASTC_MATHLIB_H_INCLUDED

 #include <cassert>
 #include <cstdint>
 #include <cmath>
 #include <cstring>

 #ifndef ASTCENC_POPCNT
   #if defined(__POPCNT__)
     #define ASTCENC_POPCNT 1
   #else
     #define ASTCENC_POPCNT 0
   #endif
 #endif

 #ifndef ASTCENC_F16C
   #if defined(__F16C__)
     #define ASTCENC_F16C 1
   #else
     #define ASTCENC_F16C 0
   #endif
 #endif

 #ifndef ASTCENC_SSE
   #if defined(__SSE4_2__)
     #define ASTCENC_SSE 42
   #elif defined(__SSE4_1__)
     #define ASTCENC_SSE 41
   #elif defined(__SSE2__) || (defined(_M_AMD64) && !defined(_M_ARM64EC))
     #define ASTCENC_SSE 20
   #else
     #define ASTCENC_SSE 0
   #endif
 #endif

 #ifndef ASTCENC_AVX
   #if defined(__AVX2__)
     #define ASTCENC_AVX 2
     #define ASTCENC_X86_GATHERS 1
   #elif defined(__AVX__)
     #define ASTCENC_AVX 1
     #define ASTCENC_X86_GATHERS 1
   #else
     #define ASTCENC_AVX 0
   #endif
 #endif

 #ifndef ASTCENC_NEON
   #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
     #define ASTCENC_NEON 1
   #else
     #define ASTCENC_NEON 0
   #endif
 #endif

 #ifndef ASTCENC_SVE
   #if defined(__ARM_FEATURE_SVE)
     #if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
       #define ASTCENC_SVE 8
     // Auto-detected SVE can only assume vector width of 4 is available, but
     // must also allow for hardware being longer and so all use of intrinsics
     // must explicitly use predicate masks to limit to 4-wide.
     #else
       #define ASTCENC_SVE 4
     #endif
     #else
     #define ASTCENC_SVE 0
   #endif
 #endif

 // Force vector-sized SIMD alignment
 #if ASTCENC_AVX || ASTCENC_SVE == 8
   #define ASTCENC_VECALIGN 32
 #elif ASTCENC_SSE || ASTCENC_NEON || ASTCENC_SVE == 4
   #define ASTCENC_VECALIGN 16
 // Use default alignment for non-SIMD builds
 #else
   #define ASTCENC_VECALIGN 0
 #endif

 // C++11 states that alignas(0) should be ignored but GCC doesn't do
 // this on some versions, so workaround and avoid emitting alignas(0)
 #if ASTCENC_VECALIGN > 0
 	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
 #else
 	#define ASTCENC_ALIGNAS
 #endif

 #if ASTCENC_SSE != 0 || ASTCENC_AVX != 0 || ASTCENC_POPCNT != 0
 	#include <immintrin.h>
 #endif

 /* ============================================================================
   Fast math library; note that many of the higher-order functions in this set
   use approximations which are less accurate, but faster, than <cmath> standard
   library equivalents.

   Note: Many of these are not necessarily faster than simple C versions when
   used on a single scalar value, but are included for testing purposes as most
   have an option based on SSE intrinsics and therefore provide an obvious route
   to future vectorization.
 ============================================================================ */

 // These are namespaced to avoid colliding with C standard library functions.
 namespace astc
 {

 static const float PI          = 3.14159265358979323846f;
 static const float PI_OVER_TWO = 1.57079632679489661923f;

 /**
  * @brief SP float absolute value.
  *
  * @param v   The value to make absolute.
  *
  * @return The absolute value.
  */
 static inline float fabs(float v)
 {
 	return std::fabs(v);
 }

 /**
  * @brief Test if a float value is a nan.
  *
  * @param v    The value test.
  *
  * @return Zero is not a NaN, non-zero otherwise.
  */
 static inline bool isnan(float v)
 {
 	return v != v;
 }

 /**
  * @brief Return the minimum of two values.
  *
  * For floats, NaNs are turned into @c q.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  *
  * @return The smallest value.
  */
 template<typename T>
 static inline T min(T p, T q)
 {
 	return p < q ? p : q;
 }

 /**
  * @brief Return the minimum of three values.
  *
  * For floats, NaNs are turned into @c r.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  * @param r   The third value to compare.
  *
  * @return The smallest value.
  */
 template<typename T>
 static inline T min(T p, T q, T r)
 {
 	return min(min(p, q), r);
 }

 /**
  * @brief Return the minimum of four values.
  *
  * For floats, NaNs are turned into @c s.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  * @param r   The third value to compare.
  * @param s   The fourth value to compare.
  *
  * @return The smallest value.
  */
 template<typename T>
 static inline T min(T p, T q, T r, T s)
 {
 	return min(min(p, q), min(r, s));
 }

 /**
  * @brief Return the maximum of two values.
  *
  * For floats, NaNs are turned into @c q.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  *
  * @return The largest value.
  */
 template<typename T>
 static inline T max(T p, T q)
 {
 	return p > q ? p : q;
 }

 /**
  * @brief Return the maximum of three values.
  *
  * For floats, NaNs are turned into @c r.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  * @param r   The third value to compare.
  *
  * @return The largest value.
  */
 template<typename T>
 static inline T max(T p, T q, T r)
 {
 	return max(max(p, q), r);
 }

 /**
  * @brief Return the maximum of four values.
  *
  * For floats, NaNs are turned into @c s.
  *
  * @param p   The first value to compare.
  * @param q   The second value to compare.
  * @param r   The third value to compare.
  * @param s   The fourth value to compare.
  *
  * @return The largest value.
  */
 template<typename T>
 static inline T max(T p, T q, T r, T s)
 {
 	return max(max(p, q), max(r, s));
 }

 /**
  * @brief Clamp a value between @c mn and @c mx.
  *
  * For floats, NaNs are turned into @c mn.
  *
  * @param v      The value to clamp.
  * @param mn     The min value (inclusive).
  * @param mx     The max value (inclusive).
  *
  * @return The clamped value.
  */
 template<typename T>
 inline T clamp(T v, T mn, T mx)
 {
 	// Do not reorder; correct NaN handling relies on the fact that comparison
 	// with NaN returns false and will fall-through to the "min" value.
 	if (v > mx) return mx;
 	if (v > mn) return v;
 	return mn;
 }

 /**
  * @brief Clamp a float value between 0.0f and 1.0f.
  *
  * NaNs are turned into 0.0f.
  *
  * @param v   The value to clamp.
  *
  * @return The clamped value.
  */
 static inline float clamp1f(float v)
 {
 	return astc::clamp(v, 0.0f, 1.0f);
 }

 /**
  * @brief Clamp a float value between 0.0f and 255.0f.
  *
  * NaNs are turned into 0.0f.
  *
  * @param v  The value to clamp.
  *
  * @return The clamped value.
  */
 static inline float clamp255f(float v)
 {
 	return astc::clamp(v, 0.0f, 255.0f);
 }

 /**
  * @brief SP float round-down.
  *
  * @param v   The value to round.
  *
  * @return The rounded value.
  */
 static inline float flt_rd(float v)
 {
 	return std::floor(v);
 }

 /**
  * @brief SP float round-to-nearest and convert to integer.
  *
  * @param v   The value to round.
  *
  * @return The rounded value.
  */
 static inline int flt2int_rtn(float v)
 {

 	return static_cast<int>(v + 0.5f);
 }

 /**
  * @brief SP float round down and convert to integer.
  *
  * @param v   The value to round.
  *
  * @return The rounded value.
  */
 static inline int flt2int_rd(float v)
 {
 	return static_cast<int>(v);
 }

 /**
  * @brief SP float bit-interpreted as an integer.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline int float_as_int(float v)
 {
 	// Future: Can use std:bit_cast with C++20
 	int iv;
 	std::memcpy(&iv, &v, sizeof(float));
 	return iv;
 }

 /**
  * @brief Integer bit-interpreted as an SP float.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline float int_as_float(int v)
 {
 	// Future: Can use std:bit_cast with C++20
 	float fv;
 	std::memcpy(&fv, &v, sizeof(int));
 	return fv;
 }

 /**
  * @brief SP float bit-interpreted as an unsigned integer.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline unsigned int float_as_uint(float v)
 {
 	// Future: Can use std:bit_cast with C++20
 	unsigned int iv;
 	std::memcpy(&iv, &v, sizeof(float));
 	return iv;
 }

 /**
  * @brief Unsigned integer bit-interpreted as an SP float.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline float uint_as_float(unsigned int v)
 {
 	// Future: Can use std:bit_cast with C++20
 	float fv;
 	std::memcpy(&fv, &v, sizeof(unsigned int));
 	return fv;
 }

 /**
  * @brief Signed int bit-interpreted as an unsigned integer.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline unsigned int int_as_uint(int v)
 {
 	// Future: Can use std:bit_cast with C++20
 	unsigned int uv;
 	std::memcpy(&uv, &v, sizeof(int));
 	return uv;
 }

 /**
  * @brief Unsigned integer bit-interpreted as a signed integer.
  *
  * @param v   The value to bitcast.
  *
  * @return The converted value.
  */
 static inline int uint_as_int(unsigned int v)
 {
 	// Future: Can use std:bit_cast with C++20
 	int sv;
 	std::memcpy(&sv, &v, sizeof(unsigned int));
 	return sv;
 }

 /**
  * @brief Fast approximation of 1.0 / sqrt(val).
  *
  * @param v   The input value.
  *
  * @return The approximated result.
  */
 static inline float rsqrt(float v)
 {
 	return 1.0f / std::sqrt(v);
 }

 /**
  * @brief Fast approximation of sqrt(val).
  *
  * @param v   The input value.
  *
  * @return The approximated result.
  */
 static inline float sqrt(float v)
 {
 	return std::sqrt(v);
 }

 /**
  * @brief Extract mantissa and exponent of a float value.
  *
  * @param      v      The input value.
  * @param[out] expo   The output exponent.
  *
  * @return The mantissa.
  */
 static inline float frexp(float v, int* expo)
 {
 	unsigned int iv = astc::float_as_uint(v);
 	*expo = ((iv >> 23) & 0xFF) - 126;
 	iv = (iv & 0x807fffff) | 0x3f000000;
 	return astc::uint_as_float(iv);
 }

 /**
  * @brief Compute the product of two sizes.
  *
  * This function is implemented to indicate if overflow has occurred, which may
  * occur when input values are not trusted. Implementation is obviously slower
  * than one that does not do this, so don't use for values we know cannot
  * overflow.
  *
  * Overflow signaling is sticky, so calling code can check at the end of a
  * sequence of multiplies.
  *
  * @param         val_a      The first value to multiply.
  * @param         val_b      The second value to multiply.
  * @param[in,out] overflow   Did previous or this calculation overflow?
  *
  * @return The multiplication result, which may have overflowed.
  */
 static inline size_t mul_safe(
 	size_t val_a,
 	size_t val_b,
 	bool& overflow
 ) {
 	size_t result = val_a * val_b;
 	overflow = overflow || ((val_b != 0) && ((result / val_b) != val_a));
 	return result;
 }

 /**
  * @brief Get the number of blocks along a single axis.
  *
  * This function is implemented so that intermediate values will not overflow,
  * which may occur when input values are not trusted. Implementation is
  * obviously slower than one that does not do this, so don't use for values
  * we know cannot overflow.
  *
  * @param dim_axis    The axis dimension, in pixels.
  * @param dim_block   The block dimension, in pixels.
  *
  * @return The number of blocks needed in this dimension.
  */
 static inline size_t get_block_count_safe(
 	size_t dim_axis,
 	size_t dim_block
 ) {
 	// Compute number of whole blocks
 	size_t blocks = dim_axis / dim_block;

 	// Add in any residual partial block
 	if (dim_axis != (dim_block * blocks))
 	{
 		blocks++;
 	}

 	return blocks;
 }

 /**
  * @brief Initialize the seed structure for a random number generator.
  *
  * Important note: For the purposes of ASTC we want sets of random numbers to
  * use the codec, but we want the same seed value across instances and threads
  * to ensure that image output is stable across compressor runs and across
  * platforms. Every PRNG created by this call will therefore return the same
  * sequence of values ...
  *
  * @param state The state structure to initialize.
  */
 void rand_init(uint64_t state[2]);

 /**
  * @brief Return the next random number from the generator.
  *
  * This RNG is an implementation of the "xoroshiro-128+ 1.0" PRNG, based on the
  * public-domain implementation given by David Blackman & Sebastiano Vigna at
  * http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
  *
  * @param state The state structure to use/update.
  */
 uint64_t rand(uint64_t state[2]);

 }

 /* ============================================================================
   Softfloat library with fp32 and fp16 conversion functionality.
 ============================================================================ */
 #if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
 	/* narrowing float->float conversions */
 	uint16_t float_to_sf16(float val);
 	float sf16_to_float(uint16_t val);
 #endif

 /*********************************
   Vector library
 *********************************/
 #include "astcenc_vecmathlib.h"

 /*********************************
   Declaration of line types
 *********************************/
 // parametric line, 2D: The line is given by line = a + b * t.

 struct line2
 {
 	vfloat4 a;
 	vfloat4 b;
 };

 // parametric line, 3D
 struct line3
 {
 	vfloat4 a;
 	vfloat4 b;
 };

 struct line4
 {
 	vfloat4 a;
 	vfloat4 b;
 };


 struct processed_line2
 {
 	vfloat4 amod;
 	vfloat4 bs;
 };

 struct processed_line3
 {
 	vfloat4 amod;
 	vfloat4 bs;
 };

 struct processed_line4
 {
 	vfloat4 amod;
 	vfloat4 bs;
 };

 #endif
	// SPDX-License-Identifier: Apache-2.0
	// ----------------------------------------------------------------------------
	// Copyright 2011-2026 Arm Limited
	//
	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
	// use this file except in compliance with the License. You may obtain a copy
	// of the License at:
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing, software
	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
	// License for the specific language governing permissions and limitations
	// under the License.
	// ----------------------------------------------------------------------------

	/*
	* This module implements a variety of mathematical data types and library
	* functions used by the codec.
	*/

	#ifndef ASTC_MATHLIB_H_INCLUDED
	#define ASTC_MATHLIB_H_INCLUDED

	#include <cassert>
	#include <cstdint>
	#include <cmath>
	#include <cstring>

	#ifndef ASTCENC_POPCNT
	#if defined(__POPCNT__)
	#define ASTCENC_POPCNT 1
	#else
	#define ASTCENC_POPCNT 0
	#endif
	#endif

	#ifndef ASTCENC_F16C
	#if defined(__F16C__)
	#define ASTCENC_F16C 1
	#else
	#define ASTCENC_F16C 0
	#endif
	#endif

	#ifndef ASTCENC_SSE
	#if defined(__SSE4_2__)
	#define ASTCENC_SSE 42
	#elif defined(__SSE4_1__)
	#define ASTCENC_SSE 41
	#elif defined(__SSE2__) \|\| (defined(_M_AMD64) && !defined(_M_ARM64EC))
	#define ASTCENC_SSE 20
	#else
	#define ASTCENC_SSE 0
	#endif
	#endif

	#ifndef ASTCENC_AVX
	#if defined(__AVX2__)
	#define ASTCENC_AVX 2
	#define ASTCENC_X86_GATHERS 1
	#elif defined(__AVX__)
	#define ASTCENC_AVX 1
	#define ASTCENC_X86_GATHERS 1
	#else
	#define ASTCENC_AVX 0
	#endif
	#endif

	#ifndef ASTCENC_NEON
	#if defined(__aarch64__) \|\| defined(_M_ARM64) \|\| defined(_M_ARM64EC)
	#define ASTCENC_NEON 1
	#else
	#define ASTCENC_NEON 0
	#endif
	#endif

	#ifndef ASTCENC_SVE
	#if defined(__ARM_FEATURE_SVE)
	#if defined(__ARM_FEATURE_SVE_BITS) && __ARM_FEATURE_SVE_BITS == 256
	#define ASTCENC_SVE 8
	// Auto-detected SVE can only assume vector width of 4 is available, but
	// must also allow for hardware being longer and so all use of intrinsics
	// must explicitly use predicate masks to limit to 4-wide.
	#else
	#define ASTCENC_SVE 4
	#endif
	#else
	#define ASTCENC_SVE 0
	#endif
	#endif

	// Force vector-sized SIMD alignment
	#if ASTCENC_AVX \|\| ASTCENC_SVE == 8
	#define ASTCENC_VECALIGN 32
	#elif ASTCENC_SSE \|\| ASTCENC_NEON \|\| ASTCENC_SVE == 4
	#define ASTCENC_VECALIGN 16
	// Use default alignment for non-SIMD builds
	#else
	#define ASTCENC_VECALIGN 0
	#endif

	// C++11 states that alignas(0) should be ignored but GCC doesn't do
	// this on some versions, so workaround and avoid emitting alignas(0)
	#if ASTCENC_VECALIGN > 0
	#define ASTCENC_ALIGNAS alignas(ASTCENC_VECALIGN)
	#else
	#define ASTCENC_ALIGNAS
	#endif

	#if ASTCENC_SSE != 0 \|\| ASTCENC_AVX != 0 \|\| ASTCENC_POPCNT != 0
	#include <immintrin.h>
	#endif

	/* ============================================================================
	Fast math library; note that many of the higher-order functions in this set
	use approximations which are less accurate, but faster, than <cmath> standard
	library equivalents.

	Note: Many of these are not necessarily faster than simple C versions when
	used on a single scalar value, but are included for testing purposes as most
	have an option based on SSE intrinsics and therefore provide an obvious route
	to future vectorization.
	============================================================================ */

	// These are namespaced to avoid colliding with C standard library functions.
	namespace astc
	{

	static const float PI = 3.14159265358979323846f;
	static const float PI_OVER_TWO = 1.57079632679489661923f;

	/**
	* @brief SP float absolute value.
	*
	* @param v The value to make absolute.
	*
	* @return The absolute value.
	*/
	static inline float fabs(float v)
	{
	return std::fabs(v);
	}

	/**
	* @brief Test if a float value is a nan.
	*
	* @param v The value test.
	*
	* @return Zero is not a NaN, non-zero otherwise.
	*/
	static inline bool isnan(float v)
	{
	return v != v;
	}

	/**
	* @brief Return the minimum of two values.
	*
	* For floats, NaNs are turned into @c q.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	*
	* @return The smallest value.
	*/
	template<typename T>
	static inline T min(T p, T q)
	{
	return p < q ? p : q;
	}

	/**
	* @brief Return the minimum of three values.
	*
	* For floats, NaNs are turned into @c r.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	* @param r The third value to compare.
	*
	* @return The smallest value.
	*/
	template<typename T>
	static inline T min(T p, T q, T r)
	{
	return min(min(p, q), r);
	}

	/**
	* @brief Return the minimum of four values.
	*
	* For floats, NaNs are turned into @c s.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	* @param r The third value to compare.
	* @param s The fourth value to compare.
	*
	* @return The smallest value.
	*/
	template<typename T>
	static inline T min(T p, T q, T r, T s)
	{
	return min(min(p, q), min(r, s));
	}

	/**
	* @brief Return the maximum of two values.
	*
	* For floats, NaNs are turned into @c q.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	*
	* @return The largest value.
	*/
	template<typename T>
	static inline T max(T p, T q)
	{
	return p > q ? p : q;
	}

	/**
	* @brief Return the maximum of three values.
	*
	* For floats, NaNs are turned into @c r.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	* @param r The third value to compare.
	*
	* @return The largest value.
	*/
	template<typename T>
	static inline T max(T p, T q, T r)
	{
	return max(max(p, q), r);
	}

	/**
	* @brief Return the maximum of four values.
	*
	* For floats, NaNs are turned into @c s.
	*
	* @param p The first value to compare.
	* @param q The second value to compare.
	* @param r The third value to compare.
	* @param s The fourth value to compare.
	*
	* @return The largest value.
	*/
	template<typename T>
	static inline T max(T p, T q, T r, T s)
	{
	return max(max(p, q), max(r, s));
	}

	/**
	* @brief Clamp a value between @c mn and @c mx.
	*
	* For floats, NaNs are turned into @c mn.
	*
	* @param v The value to clamp.
	* @param mn The min value (inclusive).
	* @param mx The max value (inclusive).
	*
	* @return The clamped value.
	*/
	template<typename T>
	inline T clamp(T v, T mn, T mx)
	{
	// Do not reorder; correct NaN handling relies on the fact that comparison
	// with NaN returns false and will fall-through to the "min" value.
	if (v > mx) return mx;
	if (v > mn) return v;
	return mn;
	}

	/**
	* @brief Clamp a float value between 0.0f and 1.0f.
	*
	* NaNs are turned into 0.0f.
	*
	* @param v The value to clamp.
	*
	* @return The clamped value.
	*/
	static inline float clamp1f(float v)
	{
	return astc::clamp(v, 0.0f, 1.0f);
	}

	/**
	* @brief Clamp a float value between 0.0f and 255.0f.
	*
	* NaNs are turned into 0.0f.
	*
	* @param v The value to clamp.
	*
	* @return The clamped value.
	*/
	static inline float clamp255f(float v)
	{
	return astc::clamp(v, 0.0f, 255.0f);
	}

	/**
	* @brief SP float round-down.
	*
	* @param v The value to round.
	*
	* @return The rounded value.
	*/
	static inline float flt_rd(float v)
	{
	return std::floor(v);
	}

	/**
	* @brief SP float round-to-nearest and convert to integer.
	*
	* @param v The value to round.
	*
	* @return The rounded value.
	*/
	static inline int flt2int_rtn(float v)
	{

	return static_cast<int>(v + 0.5f);
	}

	/**
	* @brief SP float round down and convert to integer.
	*
	* @param v The value to round.
	*
	* @return The rounded value.
	*/
	static inline int flt2int_rd(float v)
	{
	return static_cast<int>(v);
	}

	/**
	* @brief SP float bit-interpreted as an integer.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline int float_as_int(float v)
	{
	// Future: Can use std:bit_cast with C++20
	int iv;
	std::memcpy(&iv, &v, sizeof(float));
	return iv;
	}

	/**
	* @brief Integer bit-interpreted as an SP float.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline float int_as_float(int v)
	{
	// Future: Can use std:bit_cast with C++20
	float fv;
	std::memcpy(&fv, &v, sizeof(int));
	return fv;
	}

	/**
	* @brief SP float bit-interpreted as an unsigned integer.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline unsigned int float_as_uint(float v)
	{
	// Future: Can use std:bit_cast with C++20
	unsigned int iv;
	std::memcpy(&iv, &v, sizeof(float));
	return iv;
	}

	/**
	* @brief Unsigned integer bit-interpreted as an SP float.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline float uint_as_float(unsigned int v)
	{
	// Future: Can use std:bit_cast with C++20
	float fv;
	std::memcpy(&fv, &v, sizeof(unsigned int));
	return fv;
	}

	/**
	* @brief Signed int bit-interpreted as an unsigned integer.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline unsigned int int_as_uint(int v)
	{
	// Future: Can use std:bit_cast with C++20
	unsigned int uv;
	std::memcpy(&uv, &v, sizeof(int));
	return uv;
	}

	/**
	* @brief Unsigned integer bit-interpreted as a signed integer.
	*
	* @param v The value to bitcast.
	*
	* @return The converted value.
	*/
	static inline int uint_as_int(unsigned int v)
	{
	// Future: Can use std:bit_cast with C++20
	int sv;
	std::memcpy(&sv, &v, sizeof(unsigned int));
	return sv;
	}

	/**
	* @brief Fast approximation of 1.0 / sqrt(val).
	*
	* @param v The input value.
	*
	* @return The approximated result.
	*/
	static inline float rsqrt(float v)
	{
	return 1.0f / std::sqrt(v);
	}

	/**
	* @brief Fast approximation of sqrt(val).
	*
	* @param v The input value.
	*
	* @return The approximated result.
	*/
	static inline float sqrt(float v)
	{
	return std::sqrt(v);
	}

	/**
	* @brief Extract mantissa and exponent of a float value.
	*
	* @param v The input value.
	* @param[out] expo The output exponent.
	*
	* @return The mantissa.
	*/
	static inline float frexp(float v, int* expo)
	{
	unsigned int iv = astc::float_as_uint(v);
	*expo = ((iv >> 23) & 0xFF) - 126;
	iv = (iv & 0x807fffff) \| 0x3f000000;
	return astc::uint_as_float(iv);
	}

	/**
	* @brief Compute the product of two sizes.
	*
	* This function is implemented to indicate if overflow has occurred, which may
	* occur when input values are not trusted. Implementation is obviously slower
	* than one that does not do this, so don't use for values we know cannot
	* overflow.
	*
	* Overflow signaling is sticky, so calling code can check at the end of a
	* sequence of multiplies.
	*
	* @param val_a The first value to multiply.
	* @param val_b The second value to multiply.
	* @param[in,out] overflow Did previous or this calculation overflow?
	*
	* @return The multiplication result, which may have overflowed.
	*/
	static inline size_t mul_safe(
	size_t val_a,
	size_t val_b,
	bool& overflow
	) {
	size_t result = val_a * val_b;
	overflow = overflow \|\| ((val_b != 0) && ((result / val_b) != val_a));
	return result;
	}

	/**
	* @brief Get the number of blocks along a single axis.
	*
	* This function is implemented so that intermediate values will not overflow,
	* which may occur when input values are not trusted. Implementation is
	* obviously slower than one that does not do this, so don't use for values
	* we know cannot overflow.
	*
	* @param dim_axis The axis dimension, in pixels.
	* @param dim_block The block dimension, in pixels.
	*
	* @return The number of blocks needed in this dimension.
	*/
	static inline size_t get_block_count_safe(
	size_t dim_axis,
	size_t dim_block
	) {
	// Compute number of whole blocks
	size_t blocks = dim_axis / dim_block;

	// Add in any residual partial block
	if (dim_axis != (dim_block * blocks))
	{
	blocks++;
	}

	return blocks;
	}

	/**
	* @brief Initialize the seed structure for a random number generator.
	*
	* Important note: For the purposes of ASTC we want sets of random numbers to
	* use the codec, but we want the same seed value across instances and threads
	* to ensure that image output is stable across compressor runs and across
	* platforms. Every PRNG created by this call will therefore return the same
	* sequence of values ...
	*
	* @param state The state structure to initialize.
	*/
	void rand_init(uint64_t state[2]);

	/**
	* @brief Return the next random number from the generator.
	*
	* This RNG is an implementation of the "xoroshiro-128+ 1.0" PRNG, based on the
	* public-domain implementation given by David Blackman & Sebastiano Vigna at
	* http://vigna.di.unimi.it/xorshift/xoroshiro128plus.c
	*
	* @param state The state structure to use/update.
	*/
	uint64_t rand(uint64_t state[2]);

	}

	/* ============================================================================
	Softfloat library with fp32 and fp16 conversion functionality.
	============================================================================ */
	#if (ASTCENC_F16C == 0) && (ASTCENC_NEON == 0)
	/* narrowing float->float conversions */
	uint16_t float_to_sf16(float val);
	float sf16_to_float(uint16_t val);
	#endif

	/*********************************
	Vector library
	*********************************/
	#include "astcenc_vecmathlib.h"

	/*********************************
	Declaration of line types
	*********************************/
	// parametric line, 2D: The line is given by line = a + b * t.

	struct line2
	{
	vfloat4 a;
	vfloat4 b;
	};

	// parametric line, 3D
	struct line3
	{
	vfloat4 a;
	vfloat4 b;
	};

	struct line4
	{
	vfloat4 a;
	vfloat4 b;
	};


	struct processed_line2
	{
	vfloat4 amod;
	vfloat4 bs;
	};

	struct processed_line3
	{
	vfloat4 amod;
	vfloat4 bs;
	};

	struct processed_line4
	{
	vfloat4 amod;
	vfloat4 bs;
	};

	#endif