# Sync and async code in Python

### Task: you have 10^6 urls, how should you read from them the data?

In [1]:
# import requests

#  this module for async request
# import aiohttp

import asyncio
from time import strftime, time, sleep

In [2]:
# adjust those params
NUMBER_OF_URLS = 100
DELAY_TIME = 1 # sec

---

## Solution: sync way

In [3]:
def pseudo_delay_for_read(i):
    # print(f'{i}: {strftime("%X")} - start')
    sleep(DELAY_TIME)
    # print(f'{i}: {strftime("%X")} - start')
    return i

def get_data(i, acc):
    res = pseudo_delay_for_read(i)
    acc.append(res)

def main(N):
    acc = []
    print(f'Started getting data...')
    
    t0 = time()
    for i in range(N):
        get_data(i, acc)
    t1 = time()
    
    print(f'Done in {t1-t0}s: {len(acc)=}')

In [4]:
main(NUMBER_OF_URLS)

Started getting data...
Done in 100.3844940662384s: len(acc)=100


---

## Solution: async way

In [5]:
async def pseudo_delay_for_read(i):
    # print(f'{i}: {strftime("%X")} - start')
    await asyncio.sleep(DELAY_TIME)
    # print(f'{i}: {strftime("%X")} - start')
    return i

async def get_data(i, acc):
    res = await pseudo_delay_for_read(i)
    acc.append(res)
    
async def main(N):
    acc = []
    print(f'Started getting data...')
    
    tasks = [get_data(i+1, acc) for i in range(N)]
    t0 = time()
    await asyncio.gather(*tasks)
    t1 = time()
    
    print(f'Done in {t1-t0}s: {len(acc)=}')

In [6]:
loop = asyncio.get_event_loop()
loop.create_task(main(NUMBER_OF_URLS))

<Task pending name='Task-4' coro=<main() running at /var/folders/pt/x5r90jr13wx39vb5hf51w6900000gp/T/ipykernel_15487/1081672014.py:11>>

Started getting data...
Done in 1.0094258785247803s: len(acc)=100


---

## Solution: async way with batching

In [7]:
BATCH_SIZE = NUMBER_OF_URLS // 3

async def pseudo_delay_for_read(i):
    # print(f'{i}: {strftime("%X")} - start')
    await asyncio.sleep(5)
    # print(f'{i}: {strftime("%X")} - end')
    return i

async def get_data(i, acc, sem):
    async with sem:
        res = await pseudo_delay_for_read(i)
        acc.append(res)
    
async def main(N):
    acc = []
    print(f'Started getting data...')

    sem = asyncio.Semaphore(BATCH_SIZE)
    tasks = [get_data(i+1, acc, sem) for i in range(N)]
    t0 = time()
    await asyncio.gather(*tasks)
    t1 = time()

    print(f'Done in {t1-t0}s: {len(acc)=}')

In [8]:
loop = asyncio.get_event_loop()
loop.create_task(main(NUMBER_OF_URLS))

<Task pending name='Task-105' coro=<main() running at /var/folders/pt/x5r90jr13wx39vb5hf51w6900000gp/T/ipykernel_15487/1431959143.py:14>>

Started getting data...
Done in 20.011415004730225s: len(acc)=100
