astcenc_clz_bug.cpp - external/github.com/ARM-software/astc-encoder - Git at Google

 #include <iostream>

 #define ASTCENC_SSE 20
 #include "astcenc_mathlib.h"

 /**
  * @brief Convert unorm16 [0, 65535] to float16 in range [0, 1].
  */
 static ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16_bugged(vint4 p)
 {
 	vint4 fp16_one = vint4(0x3C00);
 	vint4 fp16_small = lsl<8>(p);

 	vmask4 is_one = p == vint4(0xFFFF);
 	vmask4 is_small = p < vint4(4);

 	// Manually inline clz() on Visual Studio to avoid release build codegen bug
 #if 0 && !defined(__clang__) && defined(_MSC_VER)
 	vint4 a = (~lsr<8>(p)) & p;
 	a = float_as_int(int_to_float(a));
 	a = vint4(127 + 31) - lsr<23>(a);
 	vint4 lz = clamp(0, 32, a) - 16;
 #else
 	vint4 lz = clz(p) - 16;
 #endif
 	// The value of p is corrupted after calling clz()

 	p = p * two_to_the_n(lz + 1);
 	p = p & vint4(0xFFFF);

 	p = lsr<6>(p);

 	p = p | lsl<10>(vint4(14) - lz);

 	vint4 r = select(p, fp16_one, is_one);
 	r = select(r, fp16_small, is_small);
 	return r;
 }


 int main()
 {
 	vint4 value(65519);

 	// This function inlines vint4 clz() as a workaround for the issue, which
 	// masks the problem and gives the correct result.
 	vint4 result_good = unorm16_to_sf16(value);

 	// This function uses the original code, calling clz() as a function,
 	// which corrupts the value of p in the caller in Release builds.
 	vint4 result_bad = unorm16_to_sf16_bugged(value);

 	print(result_good);
 	print(result_bad);

 	if (any(result_good != result_bad))
 	{
 		puts("Failed ...\n");
 		return 1;
 	}

 	puts("Success ...\n");
 	return 0;
 }
	#include <iostream>

	#define ASTCENC_SSE 20
	#include "astcenc_mathlib.h"

	/**
	* @brief Convert unorm16 [0, 65535] to float16 in range [0, 1].
	*/
	static ASTCENC_SIMD_INLINE vint4 unorm16_to_sf16_bugged(vint4 p)
	{
	vint4 fp16_one = vint4(0x3C00);
	vint4 fp16_small = lsl<8>(p);

	vmask4 is_one = p == vint4(0xFFFF);
	vmask4 is_small = p < vint4(4);

	// Manually inline clz() on Visual Studio to avoid release build codegen bug
	#if 0 && !defined(__clang__) && defined(_MSC_VER)
	vint4 a = (~lsr<8>(p)) & p;
	a = float_as_int(int_to_float(a));
	a = vint4(127 + 31) - lsr<23>(a);
	vint4 lz = clamp(0, 32, a) - 16;
	#else
	vint4 lz = clz(p) - 16;
	#endif
	// The value of p is corrupted after calling clz()

	p = p * two_to_the_n(lz + 1);
	p = p & vint4(0xFFFF);

	p = lsr<6>(p);

	p = p \| lsl<10>(vint4(14) - lz);

	vint4 r = select(p, fp16_one, is_one);
	r = select(r, fp16_small, is_small);
	return r;
	}


	int main()
	{
	vint4 value(65519);

	// This function inlines vint4 clz() as a workaround for the issue, which
	// masks the problem and gives the correct result.
	vint4 result_good = unorm16_to_sf16(value);

	// This function uses the original code, calling clz() as a function,
	// which corrupts the value of p in the caller in Release builds.
	vint4 result_bad = unorm16_to_sf16_bugged(value);

	print(result_good);
	print(result_bad);

	if (any(result_good != result_bad))
	{
	puts("Failed ...\n");
	return 1;
	}

	puts("Success ...\n");
	return 0;
	}