Skip to content

Commit 5ab7305

Browse files
committed
added using threads tutorial
1 parent 114adc7 commit 5ab7305

File tree

6 files changed

+122
-0
lines changed

6 files changed

+122
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -78,5 +78,6 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
7878
- [How to Send Emails in Python using smtplib Module](https://www.thepythoncode.com/article/sending-emails-in-python-smtplib). ([code](general/email-sender))
7979
- [How to Handle Files in Python using OS Module](https://www.thepythoncode.com/article/file-handling-in-python-using-os-module). ([code](python-standard-library/handling-files))
8080
- [How to Generate Random Data in Python](https://www.thepythoncode.com/article/generate-random-data-in-python). ([code](python-standard-library/generating-random-data))
81+
- [How to Use Threads to Speed Up your IO Tasks in Python](https://www.thepythoncode.com/article/using-threads-in-python). ([code](python-standard-library/using-threads))
8182

8283
For any feedback, please consider pulling requests.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# [How to Use Threads to Speed Up your IO Tasks in Python](https://www.thepythoncode.com/article/using-threads-in-python)
2+
To run this:
3+
- `pip3 install -r requirements.txt`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import requests
2+
from concurrent.futures import ThreadPoolExecutor
3+
from time import perf_counter
4+
5+
# number of threads to spawn
6+
n_threads = 5
7+
8+
# read 1024 bytes every time
9+
buffer_size = 1024
10+
11+
12+
def download(url):
13+
# download the body of response by chunk, not immediately
14+
response = requests.get(url, stream=True)
15+
# get the file name
16+
filename = url.split("/")[-1]
17+
with open(filename, "wb") as f:
18+
for data in response.iter_content(buffer_size):
19+
# write data read to the file
20+
f.write(data)
21+
22+
23+
if __name__ == "__main__":
24+
urls = [
25+
"https://cdn.pixabay.com/photo/2018/01/14/23/12/nature-3082832__340.jpg",
26+
"https://cdn.pixabay.com/photo/2013/10/02/23/03/dawn-190055__340.jpg",
27+
"https://cdn.pixabay.com/photo/2016/10/21/14/50/plouzane-1758197__340.jpg",
28+
"https://cdn.pixabay.com/photo/2016/11/29/05/45/astronomy-1867616__340.jpg",
29+
"https://cdn.pixabay.com/photo/2014/07/28/20/39/landscape-404072__340.jpg",
30+
] * 5
31+
32+
t = perf_counter()
33+
with ThreadPoolExecutor(max_workers=n_threads) as pool:
34+
pool.map(download, urls)
35+
print(f"Time took: {perf_counter() - t:.2f}s")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import requests
2+
3+
from threading import Thread
4+
from queue import Queue
5+
6+
# thread-safe queue initialization
7+
q = Queue()
8+
# number of threads to spawn
9+
n_threads = 5
10+
11+
# read 1024 bytes every time
12+
buffer_size = 1024
13+
14+
def download():
15+
global q
16+
while True:
17+
# get the url from the queue
18+
url = q.get()
19+
# download the body of response by chunk, not immediately
20+
response = requests.get(url, stream=True)
21+
# get the file name
22+
filename = url.split("/")[-1]
23+
with open(filename, "wb") as f:
24+
for data in response.iter_content(buffer_size):
25+
# write data read to the file
26+
f.write(data)
27+
# we're done downloading the file
28+
q.task_done()
29+
30+
31+
if __name__ == "__main__":
32+
urls = [
33+
"https://cdn.pixabay.com/photo/2018/01/14/23/12/nature-3082832__340.jpg",
34+
"https://cdn.pixabay.com/photo/2013/10/02/23/03/dawn-190055__340.jpg",
35+
"https://cdn.pixabay.com/photo/2016/10/21/14/50/plouzane-1758197__340.jpg",
36+
"https://cdn.pixabay.com/photo/2016/11/29/05/45/astronomy-1867616__340.jpg",
37+
"https://cdn.pixabay.com/photo/2014/07/28/20/39/landscape-404072__340.jpg",
38+
] * 5
39+
40+
# fill the queue with all the urls
41+
for url in urls:
42+
q.put(url)
43+
44+
# start the threads
45+
for t in range(n_threads):
46+
worker = Thread(target=download)
47+
# daemon thread means a thread that will end when the main thread ends
48+
worker.daemon = True
49+
worker.start()
50+
51+
# wait until the queue is empty
52+
q.join()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
requests
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
import requests
2+
from time import perf_counter
3+
4+
# read 1024 bytes every time
5+
buffer_size = 1024
6+
7+
def download(url):
8+
# download the body of response by chunk, not immediately
9+
response = requests.get(url, stream=True)
10+
# get the file name
11+
filename = url.split("/")[-1]
12+
with open(filename, "wb") as f:
13+
for data in response.iter_content(buffer_size):
14+
# write data read to the file
15+
f.write(data)
16+
17+
18+
if __name__ == "__main__":
19+
urls = [
20+
"https://cdn.pixabay.com/photo/2018/01/14/23/12/nature-3082832__340.jpg",
21+
"https://cdn.pixabay.com/photo/2013/10/02/23/03/dawn-190055__340.jpg",
22+
"https://cdn.pixabay.com/photo/2016/10/21/14/50/plouzane-1758197__340.jpg",
23+
"https://cdn.pixabay.com/photo/2016/11/29/05/45/astronomy-1867616__340.jpg",
24+
"https://cdn.pixabay.com/photo/2014/07/28/20/39/landscape-404072__340.jpg",
25+
] * 5
26+
27+
t = perf_counter()
28+
for url in urls:
29+
download(url)
30+
print(f"Time took: {perf_counter() - t:.2f}s")

0 commit comments

Comments
 (0)