# Multithreading
Multithreading is used to reduce time of excectuing multiple tasks
by starting them simultaneously 

![image.png](attachment:image.png)

In [1]:
from warnings import filterwarnings
filterwarnings("ignore")

In [2]:
import time

In [4]:
%%time
time.sleep(2) # This 2 second idle time in code
print("Hello!")

Hello!
CPU times: total: 15.6 ms
Wall time: 2.01 s


### Function to calculate simple intrest

In [7]:
def simple_intrest(p, n, r):
    print("Simple Intrest function started")
    time.sleep(2)
    i = (p*n*r)/100
    a = p + i
    print(f"Simple Intrest : {i:.2f} INR")
    print(f"Amount : {a:.2f} INR")
    print("Simple Intrest function Completed")

In [8]:
%%time
simple_intrest(p=50000, n=4, r=6.5)

Simple Intrest function started
Simple Intrest : 13000.00 INR
Amount : 63000.00 INR
Simple Intrest function Completed
CPU times: total: 0 ns
Wall time: 2 s


In [13]:
def hypotenuse(a, b):
    print("Hypotenuse function started")
    time.sleep(3)
    h = (a**2 + b**2)**(1/2)
    print(f"Hypotenuse of Sides {a} and {b} is : {h:.4f}")
    print("Hypotenuse function completed")

In [14]:
%%time
hypotenuse(4, 5)

Hypotenuse function started
Hypotenuse of Sides 4 and 5 is : 6.4031
Hypotenuse function completed
CPU times: total: 0 ns
Wall time: 3 s


### Single Threaded example

In [15]:
%%time
simple_intrest(p=67000, n=5, r=7.1)
hypotenuse(5, 6)

Simple Intrest function started
Simple Intrest : 23785.00 INR
Amount : 90785.00 INR
Simple Intrest function Completed
Hypotenuse function started
Hypotenuse of Sides 5 and 6 is : 7.8102
Hypotenuse function completed
CPU times: total: 0 ns
Wall time: 5.06 s


### Multithreading

In [16]:
from threading import Thread

In [17]:
%%time
# Create the individual threads and assign target function
th1 = Thread(target=simple_intrest, args=(58000, 6, 7.1))
th2 = Thread(target=hypotenuse, args=(7, 8))

# Start the threads
th1.start()
th2.start()

# Wait for all threads to finish
th1.join()
th2.join()

Simple Intrest function startedHypotenuse function started

Simple Intrest : 24708.00 INR
Amount : 82708.00 INR
Simple Intrest function Completed
Hypotenuse of Sides 7 and 8 is : 10.6301
Hypotenuse function completed
CPU times: total: 15.6 ms
Wall time: 3.04 s


In [18]:
(5-3)/5

0.4

### Single function but calculate multiple values

In [20]:
def square(n):
    time.sleep(1)
    s = n**2
    print(f"Square of number {n} is : {s}\n\n")

In [23]:
%%time
square(5)

Square of number 5 is : 25


CPU times: total: 0 ns
Wall time: 1.01 s


In [22]:
# Single Threaded function
def square_list(a: list[int|float]):
    for i in a:
        square(i)

In [24]:
a = [2, 3, 4, 5, 6, 7]
len(a)

6

In [25]:
%%time
square_list(a)

Square of number 2 is : 4


Square of number 3 is : 9


Square of number 4 is : 16


Square of number 5 is : 25


Square of number 6 is : 36


Square of number 7 is : 49


CPU times: total: 0 ns
Wall time: 6.01 s


In [26]:
# Multithreading
def multi_squares(a: list[int, float]):
    # Create threads for each value and start
    threads = []
    for i in a:
        th = Thread(target=square, args=(i,))
        th.start()
        threads.append(th)

    # Wait for all threads to finish
    for th in threads:
        th.join()

In [27]:
a

[2, 3, 4, 5, 6, 7]

In [28]:
%%time
multi_squares(a)

Square of number 2 is : 4

Square of number 4 is : 16



Square of number 5 is : 25


Square of number 6 is : 36


Square of number 7 is : 49


Square of number 3 is : 9


CPU times: total: 0 ns
Wall time: 1.03 s


In [30]:
b = list(range(1, 101))
print(b)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]


In [34]:
%%time
multi_squares(b)

Square of number 1 is : 1


Square of number 2 is : 4


Square of number 3 is : 9


Square of number 4 is : 16


Square of number 5 is : 25


Square of number 6 is : 36


Square of number 9 is : 81


Square of number 8 is : 64


Square of number 7 is : 49


Square of number 10 is : 100


Square of number 12 is : 144


Square of number 11 is : 121


Square of number 15 is : 225


Square of number 14 is : 196


Square of number 13 is : 169


Square of number 16 is : 256


Square of number 17 is : 289


Square of number 19 is : 361


Square of number 20 is : 400


Square of number 18 is : 324


Square of number 21 is : 441


Square of number 23 is : 529


Square of number 22 is : 484


Square of number 24 is : 576


Square of number 25 is : 625


Square of number 26 is : 676


Square of number 27 is : 729


Square of number 28 is : 784


Square of number 29 is : 841


Square of number 30 is : 900


Square of number 31 is : 961


Square of number 32 is : 1024


Square of number 33 is : 108

### Downloading multiple files with multithreading

In [35]:
urls = [
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv",
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/test.csv",
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/train.csv"
]

In [37]:
urls

['https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/test.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/train.csv']

In [38]:
urls[0]

'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv'

In [39]:
urls[0].split("/")

['https:',
 '',
 'raw.githubusercontent.com',
 'utkarshg1',
 'mlproject_regression',
 'main',
 'artifacts',
 'data.csv']

In [40]:
urls[0].split("/")[-1]

'data.csv'

In [41]:
from urllib.request import urlretrieve

In [42]:
def download_file(url: str):
    filename = url.split("/")[-1]
    print(f"File {filename} download started ...")
    urlretrieve(url, filename)
    print(f"File {filename} download complete")

In [43]:
# Single threading
def download_multiple_files(urls : list[str]):
    for i in urls:
        download_file(i)

In [44]:
urls

['https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/test.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/train.csv']

In [45]:
%%time
download_multiple_files(urls)

File data.csv download started ...
File data.csv download complete
File test.csv download started ...
File test.csv download complete
File train.csv download started ...
File train.csv download complete
CPU times: total: 328 ms
Wall time: 4.26 s


In [46]:
# Multithreading
def download_multithread(urls: list[str]):
    # Create and start threads
    threads = []
    for i in urls:
        th = Thread(target=download_file, args=(i,))
        th.start()
        threads.append(th)

    # Wait for all threads to complete
    for th in threads:
        th.join()

In [47]:
urls

['https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/test.csv',
 'https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/train.csv']

In [49]:
%%time
download_multithread(urls)

File data.csv download started ...File test.csv download started ...

File train.csv download started ...
File test.csv download complete
File train.csv download complete
File data.csv download complete
CPU times: total: 312 ms
Wall time: 1.3 s


In [50]:
(4.26 - 1.3)/4.26

0.6948356807511737

In [51]:
import pandas as pd
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619
1,1,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387
2,2,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772
3,3,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666
4,4,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 193573 entries, 0 to 193572
Data columns (total 11 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   id       193573 non-null  int64  
 1   carat    193573 non-null  float64
 2   cut      193573 non-null  object 
 3   color    193573 non-null  object 
 4   clarity  193573 non-null  object 
 5   depth    193573 non-null  float64
 6   table    193573 non-null  float64
 7   x        193573 non-null  float64
 8   y        193573 non-null  float64
 9   z        193573 non-null  float64
 10  price    193573 non-null  int64  
dtypes: float64(6), int64(2), object(3)
memory usage: 16.2+ MB


In [55]:
df.groupby(by= "cut") \
    .agg({"price": "mean"}) \
    .sort_values(by= "price", ascending=False) \
    .round(2)

Unnamed: 0_level_0,price
cut,Unnamed: 1_level_1
Fair,4810.5
Premium,4797.06
Good,4191.47
Very Good,4165.3
Ideal,3396.19
