WIP: adaptive scaling algorithm to convert ieee-754 to rational

stillwater-sc · Nov 12, 2024 · 0f5dd89 · 0f5dd89
1 parent a3f8228
commit 0f5dd89
Show file tree

Hide file tree

Showing 5 changed files with 167 additions and 19 deletions.
diff --git a/include/universal/number/rational/attributes.hpp b/include/universal/number/rational/attributes.hpp
@@ -25,7 +25,8 @@ bool sign(const rational<nbits,bt>& v) {
 // generate the maxneg through maxpos value range of a logarithmic number system configuration
 // the type of arithmetic, Modulo or Saturating, does not affect the range
 template<unsigned nbits, typename bt>
-std::string rational_range(const rational<nbits,bt>& r) {
+std::string rational_range(const rational<nbits,bt>& v) {
+	rational<nbits,bt> r{ v };
 	std::stringstream s;
 	s << std::setw(45) << type_tag(r) << " : [ "
 		<< r.maxneg() << " ... "

diff --git a/include/universal/number/rational/rational.hpp b/include/universal/number/rational/rational.hpp
@@ -59,4 +59,21 @@
 /// math functions
 #include <universal/number/rational/mathlib.hpp>
 
+///////////////////////////////////////////////////////////////////////////////////////
+/// aliases for industry standard floating point configurations
+namespace sw { namespace universal {
+
+	// rational binary of 8bits
+	using rb8 = rational<8, uint8_t>;
+	// rational binary of 16bits
+	using rb16 = rational<16, uint16_t>;
+	// rational binary of 32bits
+	using rb32 = rational<32, uint32_t>;
+	// rational binary of 64bits
+	using rb64 = rational<64, uint64_t>;
+	// rational binary of 128bits
+	using rb128 = rational<128, uint32_t>;
+
+}}
+
 #endif
diff --git a/include/universal/number/rational/rational_impl.hpp b/include/universal/number/rational/rational_impl.hpp
@@ -321,33 +321,80 @@ class rational {
 		uint64_t e{ 0 }, f{ 0 };
 		bool s{ false };
 		extractFields(rhs, s, e, f, bits);
+
 		if (e == 0) { // subnormal
 		}
 		else { // normal
-			uint64_t _a = f | ieee754_parameter<Real>::hmask;
+			uint64_t a = f | ieee754_parameter<Real>::hmask;
 			uint64_t b = ieee754_parameter<Real>::hmask;
 			int exponent = static_cast<int>(e - ieee754_parameter<Real>::bias);
-			uint64_t a{ 0 };
-			if (exponent > 0) {
-				a = _a * (1ull << exponent);
-			}
-			else {
-				a = _a / (1ull << -exponent);
-			}
+			std::cout << "exponent = " << exponent << '\n';
+			std::cout << "a        = " << to_binary(a) << '\n';
+			std::cout << "b        = " << to_binary(b) << '\n';
 			if (a == b) {
 				n = 1;
 				d = 1;
 			}
 			else {
-				// gcd
+				// do we need to round the value or can we just throw the lower bits away?
+				// 
+				// find the msb and shift it to the msb of the numerator
+				int msb = find_msb(a);
+				if (msb > nbits) {
+					int shift = 1 + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding
+					a >>= shift;
+					b >>= shift;
+				}
+				/*
+				// normalize the ratio
 				uint64_t r;
 				while (a % b > 0ull) {
 					r = a % b;
 					a = b;
 					b = r;
 				}
+				*/
+				std::cout << "a        = " << to_binary(a) << '\n';
+				std::cout << "b        = " << to_binary(b) << '\n';
+				// and finally scale the ratio
+				msb = find_msb(a);
+				uint64_t maxUpShift = (nbits - msb - 1);
+				if (exponent >= 0) {
+					uint64_t scale = static_cast<uint64_t>(exponent);
+					// find the new msb to direct how we need to scale while avoiding overflow
+					if (scale > maxUpShift) {
+						a <<= maxUpShift;
+						b >>= (scale - maxUpShift);
+					}
+					else {
+						a <<= scale;
+					}
+				}
+				else {
+					uint64_t scale = static_cast<uint64_t>(-exponent);
+					// find the new msb to direct how we need to scale while avoiding underflow
+					uint64_t maxDownShift = find_msb(b);
+					if (scale > maxDownShift) {
+						if (maxUpShift < (scale - maxDownShift)) {
+							// overflow, saturate to maxpos
+							std::cerr << "overflow: scale = " << scale << '\n';
+							n = 0; d = 0;
+						}
+						else {
+							a <<= maxUpShift;
+							b >>= maxDownShift;
+						}
+					}
+					else {
+						b >>= scale;
+					}
+
+				}
 				n = (s ? -static_cast<int64_t>(a) : static_cast<int64_t>(a));
-				d = 1;
+				d = b;
+				normalize();
+				std::cout << "n        = " << to_binary(n) << '\n';
+				std::cout << "d        = " << to_binary(d) << '\n';
 			}
 		}
 		return *this;
@@ -458,5 +505,4 @@ rational<nbits,bt> abs(const rational<nbits,bt>& v) {
 }
 
 
-
 }}  // namespace sw::universal
diff --git a/include/universal/utility/find_msb.hpp b/include/universal/utility/find_msb.hpp
@@ -20,7 +20,7 @@ namespace sw { namespace universal {
 /// <summary>
 /// find most significant bit that is set
 /// </summary>
-/// <param name="x">value to</param>
+/// <param name="x">value to scan</param>
 /// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(unsigned int x) {
 	// find the first non-zero bit
@@ -39,7 +39,7 @@ inline constexpr unsigned int find_msb(unsigned int x) {
 /// <summary>
 /// find most significant bit that is set
 /// </summary>
-/// <param name="x">value to</param>
+/// <param name="x">value to scan</param>
 /// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(unsigned long x) {
 	// find the first non-zero bit
@@ -55,6 +55,11 @@ inline constexpr unsigned int find_msb(unsigned long x) {
 	return base + bval;
 }
 
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(unsigned long long x) {
 	// find the first non-zero bit
 	unsigned int base = 0;
@@ -72,7 +77,11 @@ inline constexpr unsigned int find_msb(unsigned long long x) {
 
 //////////////////////////// SIGNED integer types ////////////////////////
 
-
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(signed char x) {
 	// find the first non-zero bit
 	uint8_t tmp = uint8_t(x);
@@ -86,6 +95,11 @@ inline constexpr unsigned int find_msb(signed char x) {
 	return base + bval;
 }
 
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(short x) {
 	// find the first non-zero bit
 	uint16_t tmp = uint16_t(x);
@@ -100,6 +114,11 @@ inline constexpr unsigned int find_msb(short x) {
 	return base + bval;
 }
 
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(int x) {
 	// find the first non-zero bit
 	uint32_t tmp = uint32_t(x);
@@ -115,6 +134,11 @@ inline constexpr unsigned int find_msb(int x) {
 	return base + bval;
 }
 
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(long x) {
 	// find the first non-zero bit
 	uint32_t tmp = uint32_t(x);
@@ -131,6 +155,11 @@ inline constexpr unsigned int find_msb(long x) {
 	return base + bval;
 }
 
+/// <summary>
+/// find most significant bit that is set
+/// </summary>
+/// <param name="x">value to scan</param>
+/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
 inline constexpr unsigned int find_msb(long long x) {
 	// find the first non-zero bit
 	uint64_t tmp = uint64_t(x);

diff --git a/static/rational/api/api.cpp b/static/rational/api/api.cpp
@@ -34,6 +34,59 @@ try {
 	std::string test_suite = "rational<16,uint16_t> API tests";
 	int nrOfFailedTestCases = 0;
 
+	{
+		Conversion< rb16 >(1.0e4f);
+		Conversion< rb16 >(1.5e4f);
+		Conversion< rb16 >(1.75e4f);
+		Conversion< rb16 >(1.875e4f);
+		Conversion< rb16 >(1.9375e4f);
+		Conversion< rb16 >(3.2767e4f);
+	}
+
+	return 0;
+
+	{
+		/*
+			rational<8, uint8_t>   : [ -128 ... -0.00787402 0 0.00787402 ... 127 ]
+			rational<16, uint16_t> : [ -32768 ... -3.05185e-05 0 3.05185e-05 ... 32767 ]
+			rational<32, uint32_t> : [ -2.14748e+09 ... -4.65661e-10 0 4.65661e-10 ... 2.14748e+09 ]
+			rational<64, uint64_t> : [ -9.22337e+18 ... -1.0842e-19 0 1.0842e-19 ... 9.22337e+18 ]
+		 */
+
+		float f{ 32767 };
+		f /= 10000;
+		std::cout << to_binary(f) << " : " << f << '\n';
+		rb16 r;
+		r = 1.9375;
+		r = f;
+		for (int i = 0; i < 4; ++i) {
+			std::cout << to_binary(r) << " : " << r << '\n';
+			r *= 10;
+		}
+	}
+
+		return 0;
+	{
+		rb16 r;
+		r.maxpos();
+		std::cout << std::setprecision(25);
+		std::cout << to_binary(float(r)) << " : " << float(r) << '\n';
+		std::cout << to_binary(double(r)) << " : " << double(r) << '\n';
+		// 0b0.10111110.00000000000000000000000 : 9.2233720368547758e+18
+		// 0b0.10000111110.0000000000000000000000000000000000000000000000000000 : 9.2233720368547758e+18
+		float f{ 9.223372036854775808e+18 };
+		std::cout << to_binary(f) << " : " << f << '\n';
+		double d{ 9.223372036854775808e+18 };
+		std::cout << to_binary(d) << " : " << d << '\n';
+
+		int64_t i64{ 9223372036854775807 };
+		std::cout << to_binary(i64) << " : " << i64 << '\n';
+
+		r = f;
+		std::cout << to_binary(r) << " : " << r << '\n';
+
+	}
+
 	// important behavioral traits
 	{
 		using TestType = rational<16,uint16_t>;
@@ -43,10 +96,11 @@ try {
 	// conversions
 	std::cout << "+---------    Conversions\n";
 	{
-		Conversion< rational<8, uint8_t> >(1.875f);
-		Conversion< rational<16, uint16_t> >(1.875f);
-		Conversion< rational<32, uint32_t> >(1.875f);
-		Conversion< rational<64, uint64_t> >(1.875f);
+		Conversion< rb8 >(-1.875f);
+		Conversion< rb16 >(1.875e1);
+		Conversion< rb32 >(-1.875e5f);
+		Conversion< rb64 >(1.875e10);
+		Conversion< rb128 >(1.875e20);
 	}
 
 	// default behavior
@@ -66,6 +120,7 @@ try {
 	// report on the dynamic range of some standard configurations
 	std::cout << "+---------    Dynamic ranges of standard rational<16,uint16_t> configurations   --------+\n";
 	{
+		// default standard types: rb8, rb16, rb32, and rb64
 		ExtremeValues< rational<8, uint8_t> >();
 		ExtremeValues< rational<16, uint16_t> >();
 		ExtremeValues< rational<32, uint32_t> >();