## Setup

In [1]:
%%capture
!pip install 'dlt[duckdb]'

In [1]:
def square_root_generator(limit):
    n = 1
    while n <= limit:
        yield n ** 0.5
        n += 1

In [2]:
def people_1():
    for i in range(1, 6):
        yield {"ID": i, "Name": f"Person_{i}", "Age": 25 + i, "City": "City_A"}


def people_2():
    for i in range(3, 9):
        yield {"ID": i, "Name": f"Person_{i}", "Age": 30 + i, "City": "City_B", "Occupation": f"Job_{i}"}

## Solutions

### Question 1

In [4]:
print(sum(square_root_generator(5)))

8.382332347441762


### Question 2

In [5]:
last_sqrt = None

for sqrt in square_root_generator(13):
  last_sqrt = sqrt
print(last_sqrt)

3.605551275463989


### Question 3

In [6]:
import dlt
import duckdb

pipeline = dlt.pipeline(pipeline_name='people_data', destination="duckdb", dataset_name="people")
pipeline.run(people_1, table_name="people", write_disposition='replace')
pipeline.run(people_2, table_name="people", write_disposition='append')

conn = duckdb.connect('people_data.duckdb')
conn.sql(f"SET SEARCH_PATH = '{pipeline.dataset_name}'")
conn.sql("SELECT SUM(age) FROM people").show()

┌──────────┐
│ sum(age) │
│  int128  │
├──────────┤
│      353 │
└──────────┘



### Question 4

In [7]:
import dlt
import duckdb

pipeline = dlt.pipeline(pipeline_name='people_data', destination="duckdb", dataset_name="people")
pipeline.run(people_1, table_name="people", primary_key='ID', write_disposition='replace')
pipeline.run(people_2, table_name="people", primary_key='ID', write_disposition='merge')

conn = duckdb.connect('people_data.duckdb')
conn.sql(f"SET SEARCH_PATH = '{pipeline.dataset_name}'")
conn.sql("SELECT SUM(age) FROM people").show()

┌──────────┐
│ sum(age) │
│  int128  │
├──────────┤
│      266 │
└──────────┘

