### Positional and Keyword Arguments

In [3]:
# a: positional-only
# b: positional-or-keyword
# c: keyword-only
def f(a, /, b, *, c):
    print(a, b, c)

f(1, 2, c=3)
f(1, b=2, c=3)

1 2 3
1 2 3


In [4]:
f(1, b=2, c=3)

SyntaxError: positional argument follows keyword argument (1803835145.py, line 1)

In [5]:
# len, sum, min, max, all, any
# len([1, 2, 3])
# len((5, 6, 7))
len(" 123 ")

5

In [6]:
len({ 'a': 10, 'b': 20 })

2

In [17]:
# sorted, reversed
a = [20, 10, 30]
r = reversed(sorted(a))
r

<list_reverseiterator at 0x788738ffde70>

In [16]:
for i in r:
    print(i)

30
20
10


In [27]:
xs = ["a", "b", "c"]

for b in xs:
    print(b)

a
b
c


In [18]:
# enumerate
xs = ["a", "b", "c"]

for i, value in enumerate(xs):
    print(i, value)

0 a
1 b
2 c


In [28]:
# zip(a, b)
names = ['Aisha', 'Belal']
ages = [23, 10]

for a, b in zip(names, ages):
    print(b, '-->', a)

23 --> Aisha
10 --> Belal


In [37]:
xs = ['abc', 'def', 'egh', 'abc', 'def', 'egh', 'abc', 'def', 'egh', 'abc', 'def', 'egh']

for i in range(10, 1, -1):
    print(i, '-->', xs[i])

10 --> def
9 --> abc
8 --> egh
7 --> def
6 --> abc
5 --> egh
4 --> def
3 --> abc
2 --> egh


In [41]:
pairs = [("a", 2), ("b", 5), ("c", 1)]

sorted(pairs)

[('a', 2), ('b', 5), ('c', 1)]

In [45]:
pairs = [("a", 2), ("b", 5), ("c", 1)]

def by_count(pair):
    return pair[1]

print(sorted(pairs, key=by_count))

[('c', 1), ('a', 2), ('b', 5)]


In [51]:
# Comprehension =
    # 1. Expression `n.upper()`
    # 2. Iteration  `for n in names`
    # 3. Condition  `if not len(n) > 5`
names = [
    "Aisha",
    "Abdurrahman",
    "Ali"
]

xs = [n.upper() for n in names if not len(n) > 5]
xs

['Aisha', 'Ali']

In [58]:
def is_missing(value):
    """True for empty / null-ish CSV values."""
    nans = ["", "na", "n/a", "null", "none", "nan"]
    # Preprocessing
    for i in nans:
        if i == value.strip().casefold():
            return True
    return False

In [None]:
is_missing(" NA ")

In [None]:
# After: refactoring
def is_missing(value):
    """True for empty / null-ish CSV values."""
    nans = ["", "na", "n/a", "null", "none", "nan"]
    # Preprocessing
    val = value.strip().casefold()
    return val in nans

In [None]:
is_missing(" NA ")

In [None]:
# After review:
# expert says: add case for when value is None.
NANS = ["", "na", "n/a", "null", "none", "nan"]

def is_missing(value):
    """True for empty / null-ish CSV values."""
    if value is None:
        return True
    # Preprocessing
    val = value.strip().casefold()
    return val in NANS

In [59]:
is_missing(" NA ")

True

In [61]:
value = "abc"
float(value)

ValueError: could not convert string to float: 'abc'

In [65]:
value = "abc"

try:
    i = float(value)
except ValueError:
    i = None
print(i)

None


### Your strategy

1. **Build lines** (a list of strings)
2. `text = "\n".join(lines) + "\n"`
3. Write to a file

In [71]:
value = 5 / 3
print(f"{value:>8.2f}")

    1.67


In [78]:
print("\n".join(["1", "2", "3"]) + "\n")

1
2
3



In [73]:
n_rows = 6_852_000

lines = []
lines.append("# CSV Profiling Report")
lines.append("")
lines.append("## Summary")
lines.append(f"- Rows: {n_rows:,}")
text = "\n".join(lines) + "\n"

print(text)

# CSV Profiling Report

## Summary
- Rows: 6,852,000



| Column | Type   | Missing  | Unique |
|---     |---:    |---:      |---:    |
| age    | number | 0 (0.0%) | 12     |

In [83]:
file_path = 'data/sample.csv'
fp = open(file_path)
print(fp.read())

a,b,c,d
1,2,3,4
5,6,7,8


In [84]:
from pathlib import Path

p = Path("data") / "sample.csv"
print(file_path)
print(p)

data/sample.csv
data/sample.csv


In [87]:
p.exists()

True

In [88]:
fp = open(Path("data") / "sample.csv")
print(fp.read())

a,b,c,d
1,2,3,4
5,6,7,8


In [93]:
print(p.parent.resolve())

/home/halgoz/bootcamp/w1d3/data


In [None]:
from csv import DictReader
from pathlib import Path

def read_csv_rows(path):
    path = Path(path)
    with path.open("r", encoding="utf-8", newline="") as f:
        return [dict(row) for row in DictReader(f)]

In [96]:
p = Path('data') / 'sample.csv'
rows = read_csv_rows(p)
rows

[{'a': '1', 'b': '2', 'c': '3', 'd': '4'},
 {'a': '5', 'b': '6', 'c': '7', 'd': '8'}]

In [100]:
columns = list(rows[0].keys())
columns

['a', 'b', 'c', 'd']

In [101]:
def get_columns(rows):
    if not rows:
        return []
    return list(rows[0].keys())

get_columns(rows)

['a', 'b', 'c', 'd']

In [None]:
import json
from pathlib import Path

def write_json(report: list | dict, path):
    path = Path(path)
    path.parent.mkdir(parents=True, exist_ok=True)
    text = json.dumps(report, indent=2, ensure_ascii=False) + "\n"
    path.write_text(text, encoding="utf-8")

write_json(rows, 'output/out.json')

In [107]:
ys = [
    'Shahad',
    'Farah Alsuailem',
    'Shumookh',
    'Shurooq',
    'Deemah',
    'Yara',
    'Farah Alnakhli',
    'Amnah',
    'Nada'
]

xs = [
    'Abdullah',
    'Mohammad',
    'Abudllah Alsalem',
    'Aseel',
    'Faisal',
    'Abdurrahman',
]

In [126]:
import random

r = random.sample(xs, k=6)
pairs = []

for i in range(0, len(r), 2):
    print(i, i+1)
    pairs.append(r[i:i+1+1])
pairs

0 1
2 3
4 5


[['Aseel', 'Mohammad'],
 ['Abdurrahman', 'Abdullah'],
 ['Faisal', 'Abudllah Alsalem']]

In [130]:
import random

def make_pairs(population):
    r = random.sample(population, k=len(population))
    pairs = []
    for i in range(0, len(r), 2):
        # print(i, i+1)
        pairs.append(r[i:i+1+1])
    return pairs

In [131]:
make_pairs(xs)

[['Aseel', 'Abdullah'],
 ['Abdurrahman', 'Mohammad'],
 ['Faisal', 'Abudllah Alsalem']]

In [132]:
make_pairs(ys)

[['Shahad', 'Farah Alnakhli'],
 ['Amnah', 'Deemah'],
 ['Shurooq', 'Shumookh'],
 ['Nada', 'Farah Alsuailem'],
 ['Yara']]