# Multithreading

In [3]:
import time
time.sleep(5)
print("Hello")
time.sleep(2)
print("World")

Hello
World


In [15]:
def simple_interest(p: float, n: float, r: float):
    print("Simple Interest function Started")
    time.sleep(3)
    i = (p * n * r) / 100
    a = p + i
    print(f"Simple Interest : {i:.2f} INR")
    print(f"Amount : {a:.2f} INR")

In [7]:
simple_interest(p = 50_000, n=3, r=7.1)

Simple Interest : 10650.00 INR
Amount : 60650.00 INR


In [16]:
import math
def hypotenuse(a: float, b: float):
    print("Hypotenuse function started")
    time.sleep(2)
    c = math.sqrt(a**2 + b**2)
    print(f"Hypotenuse of {a} and {b} is {c}")

In [17]:
%%time
simple_interest(p = 60_000, n = 5, r = 6.5)
hypotenuse(a = 3, b = 4)

Simple Interest function Started
Simple Interest : 19500.00 INR
Amount : 79500.00 INR
Hypotenuse function started
Hypotenuse of 3 and 4 is 5.0
CPU times: total: 15.6 ms
Wall time: 5 s


In [18]:
from threading import Thread

In [20]:
%%time
# Create the threads
th1 = Thread(target=simple_interest, args =(65000, 4, 7.1))
th2 = Thread(target=hypotenuse, args=(4, 5))

# Start all the threads
th1.start()
th2.start()

# Wait for all threads to finish
th1.join()
th2.join()

Simple Interest function Started
Hypotenuse function started
Hypotenuse of 4 and 5 is 6.4031242374328485
Simple Interest : 18460.00 INR
Amount : 83460.00 INR
CPU times: total: 31.2 ms
Wall time: 3.03 s


In [21]:
def square(num: int):
    print("Square function started")
    time.sleep(2)
    print(f"Square of number {num} is {num**2}")

In [23]:
%%time
simple_interest(p = 50_000, n=5, r=7.1)
hypotenuse(a = 12, b = 13)
square(num = 11)

Simple Interest function Started
Simple Interest : 17750.00 INR
Amount : 67750.00 INR
Hypotenuse function started
Hypotenuse of 12 and 13 is 17.69180601295413
Square function started
Square of number 11 is 121
CPU times: total: 0 ns
Wall time: 7.01 s


In [25]:
%%time
th1 = Thread(target = simple_interest, args = (50_000, 4, 7.1))
th2 = Thread(target = hypotenuse, args=(3, 4))
th3 = Thread(target = square, args = (12,))

# Start all threads
th1.start()
th2.start()
th3.start()

# Wait for all threads to complete
th1.join()
th2.join()
th3.join()

Simple Interest function Started
Hypotenuse function started
Square function started
Hypotenuse of 3 and 4 is 5.0Square of number 12 is 144

Simple Interest : 14200.00 INR
Amount : 64200.00 INR
CPU times: total: 15.6 ms
Wall time: 3.06 s


# Show the cubes for multiple numbers

In [28]:
def cube(num: int):
    time.sleep(1)
    print(f"Cube of number is {num**3}")

In [30]:
%%time
# Single threading
a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

for i in a:
    cube(i)

Cube of number is 1
Cube of number is 8
Cube of number is 27
Cube of number is 64
Cube of number is 125
Cube of number is 216
Cube of number is 343
Cube of number is 512
Cube of number is 729
Cube of number is 1000
Cube of number is 1331
CPU times: total: 31.2 ms
Wall time: 11 s


In [31]:
# Multithreading
def multiple_cubes(nums: list[int]):
    # Create a blank threads list
    threads = []
    # Apply for loop on nums
    for i in nums:
        th = Thread(target = cube, args = (i,))
        th.start()
        threads.append(th)
    
    # Wait for all threads to finish 
    for t in threads:
        t.join()

In [33]:
%%time
multiple_cubes(nums = [1, 2, 3, 4, 5, 6, 7])

Cube of number is 1Cube of number is 27
Cube of number is 8
Cube of number is 64
Cube of number is 125

Cube of number is 343
Cube of number is 216
CPU times: total: 15.6 ms
Wall time: 1.01 s


# Multithreading for downloading multiple files at a time

In [35]:
url1 = "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv"
print(url1)

https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv


In [36]:
url1.split("/")

['https:',
 '',
 'raw.githubusercontent.com',
 'utkarshg1',
 'mlproject_regression',
 'main',
 'artifacts',
 'data.csv']

In [37]:
url1.split("/")[-1]

'data.csv'

In [38]:
from urllib.request import urlretrieve

def download_file(url: str):
    filename = url.split("/")[-1]
    print(f"{filename} download started ...")
    urlretrieve(url, filename)
    print(f"{filename} download complete")

In [39]:
%%time
download_file(url)

data.csv download started ...
data.csv download complete
CPU times: total: 219 ms
Wall time: 1.44 s


In [40]:
urls = [
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/data.csv",
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/test.csv",
    "https://raw.githubusercontent.com/utkarshg1/mlproject_regression/main/artifacts/train.csv"
]

In [41]:
def download_single_threaded(urls: list[str]):
    for i in urls:
        download_file(i)

In [43]:
%%time
download_single_threaded(urls)

data.csv download started ...
data.csv download complete
test.csv download started ...
test.csv download complete
train.csv download started ...
train.csv download complete
CPU times: total: 281 ms
Wall time: 1.82 s


In [47]:
def multithread_download(urls: list[str]):
    threads = []
    for i in urls:
        th = Thread(target = download_file, args = (i,))
        th.start()
        threads.append(th)
    for t in threads:
        t.join()

In [48]:
%%time
multithread_download(urls = urls)

data.csv download started ...test.csv download started ...

train.csv download started ...
data.csv download complete
test.csv download complete
train.csv download complete
CPU times: total: 375 ms
Wall time: 1.22 s


In [49]:
import pandas as pd
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,0,1.52,Premium,F,VS2,62.2,58.0,7.27,7.33,4.55,13619
1,1,2.03,Very Good,J,SI2,62.0,58.0,8.06,8.12,5.05,13387
2,2,0.7,Ideal,G,VS1,61.2,57.0,5.69,5.73,3.5,2772
3,3,0.32,Ideal,G,VS1,61.6,56.0,4.38,4.41,2.71,666
4,4,1.7,Premium,G,VS2,62.6,59.0,7.65,7.61,4.77,14453


In [52]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 154858 entries, 0 to 154857
Data columns (total 11 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   id       154858 non-null  int64  
 1   carat    154858 non-null  float64
 2   cut      154858 non-null  object 
 3   color    154858 non-null  object 
 4   clarity  154858 non-null  object 
 5   depth    154858 non-null  float64
 6   table    154858 non-null  float64
 7   x        154858 non-null  float64
 8   y        154858 non-null  float64
 9   z        154858 non-null  float64
 10  price    154858 non-null  int64  
dtypes: float64(6), int64(2), object(3)
memory usage: 13.0+ MB


In [51]:
b = pd.read_csv("train.csv")
b.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,83475,0.32,Premium,E,SI1,61.6,58.0,4.38,4.41,2.71,614
1,160324,1.2,Premium,F,VS2,62.6,57.0,6.81,6.76,4.25,7002
2,101740,1.5,Ideal,I,VS2,62.2,55.0,7.3,7.26,4.53,9347
3,180341,1.67,Premium,I,SI2,61.9,59.0,7.65,7.61,4.71,8525
4,48480,1.0,Good,H,VS2,63.7,60.0,6.34,6.3,4.02,5217


In [54]:
b.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 154858 entries, 0 to 154857
Data columns (total 11 columns):
 #   Column   Non-Null Count   Dtype  
---  ------   --------------   -----  
 0   id       154858 non-null  int64  
 1   carat    154858 non-null  float64
 2   cut      154858 non-null  object 
 3   color    154858 non-null  object 
 4   clarity  154858 non-null  object 
 5   depth    154858 non-null  float64
 6   table    154858 non-null  float64
 7   x        154858 non-null  float64
 8   y        154858 non-null  float64
 9   z        154858 non-null  float64
 10  price    154858 non-null  int64  
dtypes: float64(6), int64(2), object(3)
memory usage: 13.0+ MB


In [53]:
c = pd.read_csv("test.csv")
c.head()

Unnamed: 0,id,carat,cut,color,clarity,depth,table,x,y,z,price
0,14868,0.5,Ideal,D,SI1,62.1,57.0,5.05,5.08,3.14,1355
1,165613,2.0,Very Good,G,SI2,59.5,57.0,8.08,8.15,4.89,14691
2,96727,0.27,Premium,E,VVS1,60.5,59.0,4.19,4.16,2.52,844
3,145593,0.32,Premium,I,VVS1,61.2,59.0,4.43,4.44,2.71,707
4,118689,1.19,Ideal,H,SI1,62.5,56.0,6.77,6.81,4.23,5797


In [55]:
c.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38715 entries, 0 to 38714
Data columns (total 11 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   id       38715 non-null  int64  
 1   carat    38715 non-null  float64
 2   cut      38715 non-null  object 
 3   color    38715 non-null  object 
 4   clarity  38715 non-null  object 
 5   depth    38715 non-null  float64
 6   table    38715 non-null  float64
 7   x        38715 non-null  float64
 8   y        38715 non-null  float64
 9   z        38715 non-null  float64
 10  price    38715 non-null  int64  
dtypes: float64(6), int64(2), object(3)
memory usage: 3.2+ MB
