# Generate a synthetic dataset

In [None]:
import numpy as np
import os
import matplotlib.pyplot as plt

# Constants for reproducibility
SEED = 42
np.random.seed(SEED)


X = np.linspace(-0.5, 0.5, 150).astype('float64')
noise = np.random.normal(0, 0.1, len(X)).astype('float64')
y = 2 * X + 5 + noise # y=2x+5 + error

print('X values = ',X)
print('y values = ', y)
   


In [None]:
plt.scatter(X,y)
plt.xlabel('X values')
plt.ylabel('y values')

# Equation of a straight line 

<br>

<img src="line_equation.gif" width="200px" height="200px" align="left">   <br><br>

**y:** y values  
**x:** x values  
**b:** gradient  
**a:** y intercept  


# Calculating the gradient of the line of best fit
<br>

<img src="gradient.png" width="230px" height="230px" align="left">

In [None]:
numerator = sum((X - X.mean()) * (y - y.mean()))
denominator = sum((X - X.mean())**2)

beta = numerator / denominator
print('The slope of regression line:', beta)

# Calculating the y intercept

<br>

<img src="intercept.png" width="150px" height="150px" align="left">

In [None]:
intercept = y.mean() - beta * X.mean()
print('The y intercept of our regression line:', intercept)

In [None]:
# making new predictions using our model
predicted_y_value = beta * 0.17 + intercept 
print(f'Given x=0.17 , our model predicts the corresponing y value shoud be {predicted_y_value}')

In [None]:
plt.scatter(X, y, label='Data Points')
plt.plot(X, beta * X + intercept, color='red', label='Regression Line')
plt.scatter(0.17,predicted_y_value, color='green', label='pred for x = 0.17 ')
plt.xlabel('X')
plt.ylabel('y')
plt.title('Linear Regression')
plt.legend()
plt.grid(True)
plt.show()


print(f"Calculated beta: {beta}")
print(f"Calculated intercept: {intercept}")


# Calculating the accuracy of our linear regression model




In [None]:
y_pred = beta * X + intercept

mse = np.mean((y - y_pred)**2)
y_mean = np.mean(y)
r_squared = 1 - np.sum((y - y_pred)**2) / np.sum((y - y_mean)**2)


print("Mean Squared Error (MSE):", mse)
print("R-squared (R^2):", r_squared)



In [None]:
import json
# read in ./input_json
data = json.load(open("input.json", 'r'))

# convert to numpy arrays
X = np.array(data['input_data'])

### Generate the x and y values in cairo and importing the neccessary libs



In [None]:
tensor_name =['X_values']

base_path = os.path.join("../../src")

def generate_cairo_files(data, name):
    generated_path = os.path.join(base_path, 'generated')
    os.makedirs(generated_path, exist_ok=True)

    with open(os.path.join(generated_path, f"{name}.cairo"), "w") as f:
            f.write(
                "use array::ArrayTrait;\n" +
                "use orion::operators::tensor::{FP16x16Tensor, TensorTrait, Tensor};\n" +
                "use orion::numbers::{{FixedTrait, FP16x16, FP16x16Impl}};\n"
                "\nfn {0}() -> Tensor<FP16x16>  ".format(name) + "{\n" +
                "    let mut shape = ArrayTrait::new();\n"
            )
            for dim in data.shape:
                f.write("    shape.append({0});\n".format(dim))
            f.write(
                "    let mut data = ArrayTrait::new();\n"
            )
            for val in np.nditer(data.flatten()):
                f.write("    data.append(FixedTrait::new({0}, {1} ));\n".format(abs(int(val * 2**16)), str(val < 0).lower()))
            f.write(
                "let tensor = TensorTrait::<FP16x16>::new(shape.span(), data.span()); \n \n" +
                "return tensor;\n\n"+
                "}\n"
            )
    with open(os.path.join(base_path, 'generated.cairo'), 'w') as f:
        for param_name in tensor_name:
            f.write(f"mod {param_name};\n")

In [None]:
generate_cairo_files(X, 'X_values')

## Building our OLS functions in cairo using Orion lib

In [None]:
! touch ../../src/lin_reg_func.cairo

In [None]:
%%writefile ../../src/lin_reg_func.cairo

use orion::operators::tensor::{Tensor, TensorTrait, FP16x16Tensor};
use orion::numbers::{FP16x16, FixedTrait};

/// Predicts the y values using the provided x values and computed beta and intercept.
fn predict_y_values(
    betas: Tensor<FP16x16>, x_values: Tensor<FP16x16>, intercept: FP16x16
) -> Tensor<FP16x16> {

    // create a tensor to hold all the y_pred values
    let mut y_pred_shape = array::ArrayTrait::new();
    y_pred_shape.append(x_values.data.len());

    let mut y_pred_vals = array::ArrayTrait::new();

    let mut i: u32 = 0;
    loop {
        if i >=x_values.data.len() {
            break ();
        }
        // (*x_values.data.at(i)).print();
        // TODO figure out how to elementwise multiply a tensor here using Orion.
        // If not add another for loop to do the elementwise multiplication.
        let mut predicted_value = FixedTrait::new(0, true);
        let mut j: u32 = 0;
        loop {
            if j >= betas.data.len() {
                break ();
            }
            predicted_value = *betas.data.at(j) * *x_values.data.at(j) + predicted_value;
            j += 1;
        };
       
        y_pred_vals.append(predicted_value + intercept);
        i += 1;
    };

    let y_pred_tensor = TensorTrait::<FP16x16>::new(y_pred_shape.span(), y_pred_vals.span());

    return y_pred_tensor;
}

## Running tests on our model

In [None]:
! touch ../../src/test.cairo

In [None]:
%%writefile ../../src/test.cairo
use debug::PrintTrait;

use giza::generated::X_values::X_values;
use giza::lin_reg_func::predict_y_values;
use orion::operators::tensor::{Tensor, TensorTrait, FP16x16Tensor};
use orion::numbers::{FP16x16, FixedTrait};


#[test]
#[available_gas(99999999999999999)]
fn linear_regression_test() {
    // Fetching the x
    let x_values = X_values();
    // precompute the betas (same betas used for ezkl and cairo)
    let mut data = ArrayTrait::new();
    data.append(FixedTrait::new(1, false ));
    data.append(FixedTrait::new(2, false ));
    let betas = TensorTrait::<FP16x16>::new(shape: array![1, 2].span(), data: data.span());
    let intercept = FixedTrait::new(3, false);

    let y_pred = predict_y_values(betas, x_values, intercept);
}


In [None]:
%%writefile ../../src/lib.cairo

mod generated;
mod lin_reg_func;
mod test;

### Run test

In [None]:
import subprocess
import time

# Start the timer
start_time = time.perf_counter()

# Run the command
result = subprocess.run(["scarb", "cairo-test", "-f", "linear_regression_test"], capture_output=True, text=True)

if result.returncode != 0:
    print("Error in subprocess:")
    print(result.stderr)
    exit(1)

# Stop the timer
end_time = time.perf_counter()

# Calculate the elapsed time
proving_time = end_time - start_time

# define the path that stores the benchmarking results
benchmark_path = os.path.join('../../benchmarks.json')

# check that a benchmark path exists. If not, create one. Otherwise, load the existing one
if not os.path.isfile(benchmark_path):
    data = {
        "linear_regression": {
            "orion": {
                "provingTime": proving_time
            },
            "riscZero": {}
        }
    }
    with open(benchmark_path, 'w') as f:
        json.dump(data, open(benchmark_path, 'w'))
else:
    with open(benchmark_path, 'r') as f:
        benchmark = json.load(f)

    proving_time =str(proving_time) + "s"

    # Update the proving time in the loaded benchmark
    benchmark['linear_regressions']['orion']['provingTime'] = proving_time
    

    # Write the updated benchmark back to the file
    with open(benchmark_path, 'w') as f:
        json.dump(benchmark, f, indent=4)

# Print the result (optional)
print(f"Command executed in {proving_time} seconds")
