#### In this notebook, we will create a database in our MS SQL Server and populate it with synthetic data.

In [5]:
import pyodbc
import sys
import random
import pandas as pd
from faker import Faker


#### We've already created a database called `testDB1` on the MS SQL Server containing 3 empty tables: 

`CREATE TABLE Customers (
    CustomerID INT PRIMARY KEY,
    CustomerName VARCHAR(100),
    ContactName VARCHAR(100),
    Country VARCHAR(50)
);`

`CREATE TABLE Products (
    ProductID INT PRIMARY KEY,
    ProductName VARCHAR(100),
    SupplierID INT,
    CategoryID INT,
    UnitPrice DECIMAL(10, 2)
);`

`CREATE TABLE Orders (
    OrderID INT PRIMARY KEY,
    CustomerID INT,
    OrderDate DATETIME,
    TotalAmount DECIMAL(10, 2),
    FOREIGN KEY (CustomerID) REFERENCES Customers(CustomerID)
);`

In [6]:
# establish connection with the local SQL server
conn_str = (
    "Driver={ODBC Driver 17 for SQL Server};"
    "Server=172.16.6.196,1433;"  # Use the IP address and port directly
    "Database=testDB1;"  
    "UID=wsl;" 
    "PWD=greatpond501;"  
)

print(conn_str)

conn = pyodbc.connect(conn_str)
cursor = conn.cursor()

# test the connection
cursor.execute("SELECT @@version;")

Driver={ODBC Driver 17 for SQL Server};Server=172.16.6.196,1433;Database=testDB1;UID=wsl;PWD=greatpond501;


<pyodbc.Cursor at 0x7f4ee9ce8f30>

#### Now let's insert some fake data into our tables, we will use `pyodbc` to directly run SQL queries on the database. 

In [7]:
# faker object for generating random fake data
fake = Faker()

In [22]:
num_rows = [1000, 2000, 5000]

customer_ids = random.sample(range(1, num_rows[0] + 1), num_rows[0])
product_ids = random.sample(range(1, num_rows[1] + 1), num_rows[1])
order_ids = random.sample(range(1, num_rows[2] + 1), num_rows[2])


# Clear out the tables
cursor.execute("DELETE FROM Orders")
cursor.execute("DELETE FROM Products")
cursor.execute("DELETE FROM Customers")
conn.commit()

In [23]:
# insert data into Customers table
for customer_id in customer_ids:
    cursor.execute("INSERT INTO Customers (CustomerID, CustomerName, ContactName, Country) VALUES (?, ?, ?, ?)",
                   (customer_id, fake.company(), fake.name(), fake.country()))

# insert data into Products table
for product_id in product_ids:
    cursor.execute("INSERT INTO Products (ProductID, ProductName, SupplierID, CategoryID, UnitPrice) VALUES (?, ?, ?, ?, ?)",
                   (product_id, fake.word(),fake.random_int(min=1, max=100), fake.random_int(min=1, max=100), fake.random_number(digits=2)))

# insert data into Orders table
for order_id in order_ids:
    cursor.execute("INSERT INTO Orders (OrderID, CustomerID, OrderDate, TotalAmount) VALUES (?, ?, ?, ?)",
                   (order_id, random.choice(customer_ids), fake.date_time_this_decade(), fake.random_number(digits=2)))


cursor.commit()
conn.close()   
