Skip to content

Commit

Permalink
WIP: adaptive scaling algorithm to convert ieee-754 to rational
Browse files Browse the repository at this point in the history
  • Loading branch information
Ravenwater committed Nov 12, 2024
1 parent a3f8228 commit 0f5dd89
Show file tree
Hide file tree
Showing 5 changed files with 167 additions and 19 deletions.
3 changes: 2 additions & 1 deletion include/universal/number/rational/attributes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ bool sign(const rational<nbits,bt>& v) {
// generate the maxneg through maxpos value range of a logarithmic number system configuration
// the type of arithmetic, Modulo or Saturating, does not affect the range
template<unsigned nbits, typename bt>
std::string rational_range(const rational<nbits,bt>& r) {
std::string rational_range(const rational<nbits,bt>& v) {
rational<nbits,bt> r{ v };
std::stringstream s;
s << std::setw(45) << type_tag(r) << " : [ "
<< r.maxneg() << " ... "
Expand Down
17 changes: 17 additions & 0 deletions include/universal/number/rational/rational.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,21 @@
/// math functions
#include <universal/number/rational/mathlib.hpp>

///////////////////////////////////////////////////////////////////////////////////////
/// aliases for industry standard floating point configurations
namespace sw { namespace universal {

// rational binary of 8bits
using rb8 = rational<8, uint8_t>;
// rational binary of 16bits
using rb16 = rational<16, uint16_t>;
// rational binary of 32bits
using rb32 = rational<32, uint32_t>;
// rational binary of 64bits
using rb64 = rational<64, uint64_t>;
// rational binary of 128bits
using rb128 = rational<128, uint32_t>;

}}

#endif
68 changes: 57 additions & 11 deletions include/universal/number/rational/rational_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -321,33 +321,80 @@ class rational {
uint64_t e{ 0 }, f{ 0 };
bool s{ false };
extractFields(rhs, s, e, f, bits);

if (e == 0) { // subnormal
}
else { // normal
uint64_t _a = f | ieee754_parameter<Real>::hmask;
uint64_t a = f | ieee754_parameter<Real>::hmask;
uint64_t b = ieee754_parameter<Real>::hmask;
int exponent = static_cast<int>(e - ieee754_parameter<Real>::bias);
uint64_t a{ 0 };
if (exponent > 0) {
a = _a * (1ull << exponent);
}
else {
a = _a / (1ull << -exponent);
}
std::cout << "exponent = " << exponent << '\n';
std::cout << "a = " << to_binary(a) << '\n';
std::cout << "b = " << to_binary(b) << '\n';
if (a == b) {
n = 1;
d = 1;
}
else {
// gcd
// do we need to round the value or can we just throw the lower bits away?
//
// find the msb and shift it to the msb of the numerator
int msb = find_msb(a);
if (msb > nbits) {
int shift = 1 + msb - nbits; // one extra slot as we are shifting into a 2's complement encoding
a >>= shift;
b >>= shift;
}
/*
// normalize the ratio
uint64_t r;
while (a % b > 0ull) {
r = a % b;
a = b;
b = r;
}
*/
std::cout << "a = " << to_binary(a) << '\n';
std::cout << "b = " << to_binary(b) << '\n';
// and finally scale the ratio
msb = find_msb(a);
uint64_t maxUpShift = (nbits - msb - 1);
if (exponent >= 0) {
uint64_t scale = static_cast<uint64_t>(exponent);
// find the new msb to direct how we need to scale while avoiding overflow
if (scale > maxUpShift) {
a <<= maxUpShift;
b >>= (scale - maxUpShift);
}
else {
a <<= scale;
}
}
else {
uint64_t scale = static_cast<uint64_t>(-exponent);
// find the new msb to direct how we need to scale while avoiding underflow
uint64_t maxDownShift = find_msb(b);
if (scale > maxDownShift) {
if (maxUpShift < (scale - maxDownShift)) {
// overflow, saturate to maxpos
std::cerr << "overflow: scale = " << scale << '\n';
n = 0; d = 0;
}
else {
a <<= maxUpShift;
b >>= maxDownShift;
}
}
else {
b >>= scale;
}

}
n = (s ? -static_cast<int64_t>(a) : static_cast<int64_t>(a));
d = 1;
d = b;
normalize();
std::cout << "n = " << to_binary(n) << '\n';
std::cout << "d = " << to_binary(d) << '\n';
}
}
return *this;
Expand Down Expand Up @@ -458,5 +505,4 @@ rational<nbits,bt> abs(const rational<nbits,bt>& v) {
}



}} // namespace sw::universal
35 changes: 32 additions & 3 deletions include/universal/utility/find_msb.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace sw { namespace universal {
/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to</param>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(unsigned int x) {
// find the first non-zero bit
Expand All @@ -39,7 +39,7 @@ inline constexpr unsigned int find_msb(unsigned int x) {
/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to</param>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(unsigned long x) {
// find the first non-zero bit
Expand All @@ -55,6 +55,11 @@ inline constexpr unsigned int find_msb(unsigned long x) {
return base + bval;
}

/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(unsigned long long x) {
// find the first non-zero bit
unsigned int base = 0;
Expand All @@ -72,7 +77,11 @@ inline constexpr unsigned int find_msb(unsigned long long x) {

//////////////////////////// SIGNED integer types ////////////////////////


/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(signed char x) {
// find the first non-zero bit
uint8_t tmp = uint8_t(x);
Expand All @@ -86,6 +95,11 @@ inline constexpr unsigned int find_msb(signed char x) {
return base + bval;
}

/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(short x) {
// find the first non-zero bit
uint16_t tmp = uint16_t(x);
Expand All @@ -100,6 +114,11 @@ inline constexpr unsigned int find_msb(short x) {
return base + bval;
}

/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(int x) {
// find the first non-zero bit
uint32_t tmp = uint32_t(x);
Expand All @@ -115,6 +134,11 @@ inline constexpr unsigned int find_msb(int x) {
return base + bval;
}

/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(long x) {
// find the first non-zero bit
uint32_t tmp = uint32_t(x);
Expand All @@ -131,6 +155,11 @@ inline constexpr unsigned int find_msb(long x) {
return base + bval;
}

/// <summary>
/// find most significant bit that is set
/// </summary>
/// <param name="x">value to scan</param>
/// <returns> position of MSB that is set. LSB is defined to be at position 1, so no bits set returns 0</returns>
inline constexpr unsigned int find_msb(long long x) {
// find the first non-zero bit
uint64_t tmp = uint64_t(x);
Expand Down
63 changes: 59 additions & 4 deletions static/rational/api/api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,59 @@ try {
std::string test_suite = "rational<16,uint16_t> API tests";
int nrOfFailedTestCases = 0;

{
Conversion< rb16 >(1.0e4f);
Conversion< rb16 >(1.5e4f);
Conversion< rb16 >(1.75e4f);
Conversion< rb16 >(1.875e4f);
Conversion< rb16 >(1.9375e4f);
Conversion< rb16 >(3.2767e4f);
}

return 0;

{
/*
rational<8, uint8_t> : [ -128 ... -0.00787402 0 0.00787402 ... 127 ]
rational<16, uint16_t> : [ -32768 ... -3.05185e-05 0 3.05185e-05 ... 32767 ]
rational<32, uint32_t> : [ -2.14748e+09 ... -4.65661e-10 0 4.65661e-10 ... 2.14748e+09 ]
rational<64, uint64_t> : [ -9.22337e+18 ... -1.0842e-19 0 1.0842e-19 ... 9.22337e+18 ]
*/

float f{ 32767 };
f /= 10000;
std::cout << to_binary(f) << " : " << f << '\n';
rb16 r;
r = 1.9375;
r = f;
for (int i = 0; i < 4; ++i) {
std::cout << to_binary(r) << " : " << r << '\n';
r *= 10;
}
}

return 0;
{
rb16 r;
r.maxpos();
std::cout << std::setprecision(25);
std::cout << to_binary(float(r)) << " : " << float(r) << '\n';
std::cout << to_binary(double(r)) << " : " << double(r) << '\n';
// 0b0.10111110.00000000000000000000000 : 9.2233720368547758e+18
// 0b0.10000111110.0000000000000000000000000000000000000000000000000000 : 9.2233720368547758e+18
float f{ 9.223372036854775808e+18 };
std::cout << to_binary(f) << " : " << f << '\n';
double d{ 9.223372036854775808e+18 };
std::cout << to_binary(d) << " : " << d << '\n';

int64_t i64{ 9223372036854775807 };
std::cout << to_binary(i64) << " : " << i64 << '\n';

r = f;
std::cout << to_binary(r) << " : " << r << '\n';

}

// important behavioral traits
{
using TestType = rational<16,uint16_t>;
Expand All @@ -43,10 +96,11 @@ try {
// conversions
std::cout << "+--------- Conversions\n";
{
Conversion< rational<8, uint8_t> >(1.875f);
Conversion< rational<16, uint16_t> >(1.875f);
Conversion< rational<32, uint32_t> >(1.875f);
Conversion< rational<64, uint64_t> >(1.875f);
Conversion< rb8 >(-1.875f);
Conversion< rb16 >(1.875e1);
Conversion< rb32 >(-1.875e5f);
Conversion< rb64 >(1.875e10);
Conversion< rb128 >(1.875e20);
}

// default behavior
Expand All @@ -66,6 +120,7 @@ try {
// report on the dynamic range of some standard configurations
std::cout << "+--------- Dynamic ranges of standard rational<16,uint16_t> configurations --------+\n";
{
// default standard types: rb8, rb16, rb32, and rb64
ExtremeValues< rational<8, uint8_t> >();
ExtremeValues< rational<16, uint16_t> >();
ExtremeValues< rational<32, uint32_t> >();
Expand Down

0 comments on commit 0f5dd89

Please sign in to comment.