In [2]:
pwd

'C:\\Users\\Baron\\Desktop\\EE_267_Repo\\EE_267\\%PATH_EE267%\\EE267_env\\EE278'

### HW2 IEEE 754 Floating point standard

In [13]:
import struct
import numpy as np

def float_to_fp32(value):
    """Converts a float value to IEEE 754 single precision (FP32)"""
    return struct.unpack('>I', struct.pack('>f', value))[0]

def float_to_fp16(value):
    """Converts a float value to IEEE 754 half precision (FP16)"""
    return np.float16(value).view(np.uint16)

def float_to_bf16(value):
    """Converts a float value to BF16 (Bfloat16)"""
    # Convert to FP32 first
    fp32 = float_to_fp32(value)
    # Extract top 16 bits
    bf16 = (fp32 >> 16) & 0xFFFF
    return bf16
    
def float_to_fixed_point(value, total_bits, fractional_bits):
    """Converts a float value to a fixed-point representation"""
    scale_factor = 2 ** fractional_bits
    fixed_point_value = int(value * scale_factor)
    max_value = 2 ** total_bits - 1
    if fixed_point_value > max_value:
        fixed_point_value = max_value
    return fixed_point_value

def to_binary_str(value, bit_length):
    """Converts a number to a binary string, with underscores between each nibble (4 bits)"""
    return '_'.join(f'{value:0{bit_length}b}'[i:i+4] for i in range(0, bit_length, 4))


# Value to convert
value = 3.4375

# FP32 (Single Precision)
fp32_bin = to_binary_str(float_to_fp32(value), 32)
print(f"FP32: {fp32_bin}")

# FP16 (Half Precision)
fp16_bin = to_binary_str(float_to_fp16(value), 16)
print(f"FP16: {fp16_bin}")

# BF16 (Bfloat16)
bf16_bin = to_binary_str(float_to_bf16(value), 16)
print(f"BF16: {bf16_bin}")

# 8-bit fixed-point (assuming 4 integer bits, 4 fractional bits)
fixed_8bit = float_to_fixed_point(value, 8, 4)
fixed_8bit_bin = to_binary_str(fixed_8bit, 8)
print(f"8-bit Fixed Point: {fixed_8bit_bin}")

# 5-bit fixed-point (assuming 2 integer bits, 3 fractional bits)
fixed_5bit = float_to_fixed_point(value, 5, 3)
fixed_5bit_bin = to_binary_str(fixed_5bit, 5)
print(f"5-bit Fixed Point: {fixed_5bit_bin}")

FP32: 0100_0000_0101_1100_0000_0000_0000_0000
FP16: 0100_0010_1110_0000
BF16: 0100_0000_0101_1100
8-bit Fixed Point: 0011_0111
5-bit Fixed Point: 1101_1


In [14]:
import numpy as np
import struct

# Function to group bits into blocks of 4 separated by underscores
def group_bits(bits, group_size=4):
    return '_'.join([bits[i:i+group_size] for i in range(0, len(bits), group_size)])

# Input value
value = 3.4375

# FP32 (Single Precision 32-bit)
fp32 = np.float32(value)
fp32_bits = np.binary_repr(np.frombuffer(np.float32(fp32).tobytes(), dtype=np.uint32)[0], width=32)
fp32_grouped = group_bits(fp32_bits)

# FP16 (Half Precision 16-bit)
fp16 = np.float16(value)
fp16_bits = np.binary_repr(np.frombuffer(np.float16(fp16).tobytes(), dtype=np.uint16)[0], width=16)
fp16_grouped = group_bits(fp16_bits)

# BF16 (Brain Floating Point 16-bit)
# BF16 is truncated from FP32, keeping the first 16 bits.
bf16_bits = fp32_bits[:16]
bf16_grouped = group_bits(bf16_bits)

# 8-bit fixed-point representation (with 4 integer bits and 4 fractional bits)
scale_8bit = 2**4
fixed8bit = int(value * scale_8bit)
fixed8bit_bits = np.binary_repr(fixed8bit & 0xFF, width=8)
fixed8bit_grouped = group_bits(fixed8bit_bits)

# 5-bit fixed-point representation (with 2 integer bits and 3 fractional bits)
scale_5bit = 2**3
fixed5bit = int(value * scale_5bit)
fixed5bit_bits = np.binary_repr(fixed5bit & 0x1F, width=5)
fixed5bit_grouped = group_bits(fixed5bit_bits, group_size=5)  # No need for 4-bit groups in 5-bit representation

# Output results
print(f"FP32 bits (grouped): {fp32_grouped}")
print(f"FP16 bits (grouped): {fp16_grouped}")
print(f"BF16 bits (grouped): {bf16_grouped}")
print(f"8-bit Fixed Point bits (grouped): {fixed8bit_grouped}")
print(f"5-bit Fixed Point bits (grouped): {fixed5bit_grouped}")

FP32 bits (grouped): 0100_0000_0101_1100_0000_0000_0000_0000
FP16 bits (grouped): 0100_0010_1110_0000
BF16 bits (grouped): 0100_0000_0101_1100
8-bit Fixed Point bits (grouped): 0011_0111
5-bit Fixed Point bits (grouped): 11011


In [16]:
import struct
import numpy as np

def float_to_bin(num, width):
    """ Convert a floating-point number to binary string representation. """
    return format(struct.unpack('!I', struct.pack('!f', num))[0], '032b')[-width:]

def fp32_to_binary(num):
    """ Convert number to FP32 binary string. """
    return float_to_bin(num, 32)

def fp16_to_binary(num):
    """ Convert number to FP16 binary string. """
    # Using numpy to handle half-precision conversion
    fp16 = np.float16(num)
    return float_to_bin(fp16, 16)

def bf16_to_binary(num):
    """ Convert number to BF16 binary string. """
    # Using numpy to handle bfloat16 conversion
    bf16 = np.float32(num).astype(np.float16).view(np.uint16)
    return format(bf16, '016b')[-16:]

def fixed_point_to_binary(num, integer_bits, fractional_bits):
    """ Convert number to fixed-point binary representation. """
    scaling_factor = 2 ** fractional_bits
    fixed_point_value = int(round(num * scaling_factor))
    
    # Ensure it fits within the bit width
    bit_width = integer_bits + fractional_bits
    fixed_point_value &= (1 << bit_width) - 1
    
    return format(fixed_point_value, f'0{bit_width}b')

def main():
    num = -4.375


    
    # FP32
    fp32_binary = fp32_to_binary(num)
    fp32_hex = hex(int(fp32_binary, 2))[2:].upper().zfill(8)
    
    # FP16
    fp16_binary = fp16_to_binary(num)
    fp16_hex = hex(int(fp16_binary, 2))[2:].upper().zfill(4)
    
    # BF16
    bf16_binary = bf16_to_binary(num)
    bf16_hex = hex(int(bf16_binary, 2))[2:].upper().zfill(4)
    
    # 8-bit Fixed Point (4 integer bits / 4 fractional bits)
    fixed8_binary = fixed_point_to_binary(num, 4, 4)
    fixed8_hex = hex(int(fixed8_binary, 2))[2:].upper().zfill(2)
    
    # 5-bit Fixed Point (2 integer bits / 3 fractional bits)
    fixed5_binary = fixed_point_to_binary(num, 2, 3)
    fixed5_hex = hex(int(fixed5_binary, 2))[2:].upper().zfill(1)
    
    # Print Results
    print("FP32 (Single Precision):")
    print(f"Binary: {fp32_binary}")
    print(f"Hexadecimal: {fp32_hex}")
    
    print("\nFP16 (Half Precision):")
    print(f"Binary: {fp16_binary}")
    print(f"Hexadecimal: {fp16_hex}")
    
    print("\nBF16 (Bfloat16):")
    print(f"Binary: {bf16_binary}")
    print(f"Hexadecimal: {bf16_hex}")
    
    print("\n8-bit Fixed Point:")
    print(f"Binary: {fixed8_binary}")
    print(f"Hexadecimal: {fixed8_hex}")
    
    print("\n5-bit Fixed Point:")
    print(f"Binary: {fixed5_binary}")
    print(f"Hexadecimal: {fixed5_hex}")

if __name__ == "__main__":
    main()

FP32 (Single Precision):
Binary: 11000000100011000000000000000000
Hexadecimal: C08C0000

FP16 (Half Precision):
Binary: 0000000000000000
Hexadecimal: 0000

BF16 (Bfloat16):
Binary: 1100010001100000
Hexadecimal: C460

8-bit Fixed Point:
Binary: 10111010
Hexadecimal: BA

5-bit Fixed Point:
Binary: 11101
Hexadecimal: 1D


In [4]:
import struct
import numpy as np

def float_to_fp32_binary(num):
    """ Convert a floating-point number to IEEE 754 FP32 binary string representation. """
    packed = struct.pack('!f', num)
    return ''.join(f'{byte:08b}' for byte in packed[::-1])

def main():
    num = 0.00000001
    fp32_binary = float_to_fp32_binary(num)
    fp32_hex = hex(int(fp32_binary, 2))[2:].upper().zfill(8)
    
    # Print Results
    print("FP32 (Single Precision):")
    print(f"Binary: {fp32_binary}")
    print(f"Hexadecimal: {fp32_hex}")

if __name__ == "__main__":
    main()

FP32 (Single Precision):
Binary: 01110111110011000010101100110010
Hexadecimal: 77CC2B32


In [3]:
import struct

def float_to_fp64(number):
    # Step 1: Convert float to binary using struct.pack (IEEE 754 format)
    packed = struct.pack('>d', number)  # '>d' for big-endian double-precision float
    # Step 2: Convert the packed binary data to a hexadecimal string
    hex_rep = ''.join(f'{byte:02x}' for byte in packed)
    # Step 3: Convert the hexadecimal string to binary representation
    bin_rep = bin(int(hex_rep, 16))[2:].zfill(64)  # Fill leading zeroes to make it 64 bits
    
    # Extract sign, exponent, and mantissa from the binary representation
    sign = bin_rep[0]
    exponent = bin_rep[1:12]
    mantissa = bin_rep[12:]
    
    # Helper function to insert underscores every 4 bits
    def format_with_underscores(bits):
        return '_'.join([bits[i:i+4] for i in range(0, len(bits), 4)])
    
    return {
        "sign_bit": sign,
        "exponent_bits": format_with_underscores(exponent),
        "mantissa_bits": format_with_underscores(mantissa),
        "binary_representation": format_with_underscores(bin_rep)
    }

# Test the function
number = .875
fp64_rep = float_to_fp64(number)
print(f"Decimal: {number}")
print(f"Sign bit: {fp64_rep['sign_bit']}")
print(f"Exponent bits: {fp64_rep['exponent_bits']}")
print(f"Mantissa bits: {fp64_rep['mantissa_bits']}")
print(f"Full IEEE 754 FP64 binary representation: {fp64_rep['binary_representation']}")


Decimal: 0.875
Sign bit: 0
Exponent bits: 0111_1111_110
Mantissa bits: 1100_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000
Full IEEE 754 FP64 binary representation: 0011_1111_1110_1100_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000_0000


In [8]:
import struct

def float_to_bfloat16(value):
    # Convert the float to 32-bit IEEE 754 binary representation
    bits = struct.unpack('>I', struct.pack('>f', value))[0]
    
    # Extract sign (1 bit)
    sign = (bits >> 31) & 0x1
    
    # Extract exponent (8 bits)
    exponent = (bits >> 23) & 0xFF
    
    # Extract mantissa (23 bits, but we will truncate it to 7 bits for bfloat16)
    mantissa = (bits >> 16) & 0x7F  # Keep the top 7 bits
    
    # Construct the bfloat16 representation (16 bits total)
    bfloat16_bits = (sign << 15) | (exponent << 7) | mantissa
    
    return bfloat16_bits

def bfloat16_to_float(bfloat16):
    # Extract the sign, exponent, and mantissa from the 16-bit bfloat16 value
    sign = (bfloat16 >> 15) & 0x1
    exponent = (bfloat16 >> 7) & 0xFF
    mantissa = bfloat16 & 0x7F
    
    # Convert back to a float32-like value
    sign_mult = -1 if sign == 1 else 1
    if exponent == 0:
        # Subnormal number
        return sign_mult * (mantissa / 2**7) * 2**(-126)
    elif exponent == 0xFF:
        # Infinity or NaN
        return float('inf') if mantissa == 0 else float('nan')
    else:
        # Normalized number
        return sign_mult * (1 + mantissa / 2**7) * 2**(exponent - 127)

# Example calculation using bfloat16
value1 = 1.125
value2 = -0.25

# Convert both values to bfloat16
bfloat16_1 = float_to_bfloat16(value1)
bfloat16_2 = float_to_bfloat16(value2)

# Perform the subtraction in float space and convert the result back to bfloat16
result_value = bfloat16_to_float(bfloat16_1) - bfloat16_to_float(bfloat16_2)
bfloat16_result = float_to_bfloat16(result_value)

# Convert the result to a readable binary string with underscores
binary_representation = '_'.join(f'{bfloat16_result:016b}'[i:i+4] for i in range(0, 16, 4))
print(f"Bfloat16 result: {binary_representation}")

Bfloat16 result: 0011_1111_1011_0000
