# 2) Strings — Exercises

**Learning goals:** indexing/slicing, methods, immutability, join/split, normalization, searching, simple formatting.


### Warm-ups

1. **Middle char(s)**

   * `middle(s)` → If odd length return the middle char, else return the two middle chars.

   ```python
   def middle(s):
       ...
   assert middle("abc") == "b"
   assert middle("abba") == "bb"
   ```

In [None]:
def middle(s):
    n = len(s)
    mid = n // 2
    if n % 2 == 0:
        return s[mid - 1:mid + 1]
    else:
        return s[mid]
assert middle("abc") == "b"
assert middle("abba") == "bb"


2. **Title-case safely**

   * `safe_title(s)` → title case but do not lowercase words fully in ALL CAPS acronyms (e.g., `"learn SQL now"` → `"Learn SQL Now"`; `"use GPU"` → `"Use GPU"`).

   ```python
   def safe_title(s):
       ...
   ```



In [None]:
def safe_title(s):
    def transform(word):
        if word.isupper():
            return word
        return word.capitalize()
    return " ".join(transform(w) for w in s.split())

assert safe_title("learn SQL now") == "Learn SQL Now"
assert safe_title("use GPU") == "Use GPU"

3. **Reverse words**

   * `reverse_words(s)` → reverse word order but keep internal characters.

   ```python
   def reverse_words(s):
       ...
   assert reverse_words("one two  three") == "three two  one"
   ```



In [None]:
def reverse_words(s):
    parts = []
    i = 0
    n = len(s)
    while i < n:
        if s[i].isspace():
            start = i
            while i < n and s[i].isspace():
                i += 1
            parts.append(s[start:i])  # spaces
        else:
            start = i
            while i < n and not s[i].isspace():
                i += 1
            parts.append(s[start:i])  # word

    # Extract words and reverse them
    words = [p for p in parts if not p.isspace()]
    words.reverse()

    # Replace words in order, keep spaces
    result = []
    word_idx = 0
    for p in parts:
        if p.isspace():
            result.append(p)
        else:
            result.append(words[word_idx])
            word_idx += 1

    return "".join(result)

assert reverse_words("one two  three") == "three two  one"



4. **Normalize spaces**

   * `squash_spaces(s)` → collapse multiple spaces/tabs to a single space; strip ends.

   ```python
   def squash_spaces(s):
       ...
   assert squash_spaces("  a\t b   c ") == "a b c"
   ```



In [None]:
def squash_spaces(s):
    return " ".join(s.split())
assert squash_spaces("  a\t b   c ") == "a b c"


5. **CSV line → list (no quotes)**

   * `split_csv(s)` → split by commas, trim whitespace around fields; ignore empty trailing field if line ends with comma.

   ```python
   def split_csv(s):
       ...
   assert split_csv(" a, b ,c,") == ["a","b","c"]
   ```



In [None]:
def split_csv(s):
    fields = s.split(',')
    # Strip whitespace around each field
    fields = [f.strip() for f in fields]
    # If last field is empty (due to trailing comma), remove it
    if fields and fields[-1] == '':
        fields.pop()
    return fields
assert split_csv(" a, b ,c,") == ["a","b","c"]


6. **Mask secrets**

   * `mask_email(s)` → keep first char of user and domain, mask the rest with `*`, keep TLD.

     * `"alice@example.com"` → `"a****@e******.com"`

   ```python
   def mask_email(s):
       ...
   ```



In [None]:
def mask_email(s):
    user, domain = s.split('@', 1)
    masked_user = user[0] + '*' * (len(user) - 1) if len(user) > 1 else user

    if '.' in domain:
        domain_name, tld = domain.rsplit('.', 1)
        masked_domain_name = domain_name[0] + '*' * (
            len(domain_name) - 1) if len(domain_name) > 1 else domain_name
        masked_domain = masked_domain_name + '.' + tld
    else:
        # no dot in domain, mask all but first char
        masked_domain = domain[0] + '*' * \
            (len(domain) - 1) if len(domain) > 1 else domain

    return masked_user + '@' + masked_domain


assert mask_email("alice@example.com") == "a****@e******.com"

7. **Find all indexes**

   * `find_all(s, sub)` → list of start indices where `sub` occurs (including overlaps).

   ```python
   def find_all(s, sub):
       ...
   assert find_all("aaaa", "aa") == [0,1,2]
   ```



In [None]:
def find_all(s, sub):
    result = []
    start = 0
    while True:
        idx = s.find(sub, start)
        if idx == -1:
            break
        result.append(idx)
        start = idx + 1  # advance by 1 to allow overlaps
    return result
assert find_all("aaaa", "aa") == [0,1,2]


8. **Anagram check**

   * `is_anagram(a, b)` ignoring spaces, case, punctuation.

   ```python
   def is_anagram(a, b):
       ...
   assert is_anagram("Listen", "Silent")
   ```



In [None]:
def is_anagram(a, b):
    def normalize(s):
        return sorted(c.lower() for c in s if c.isalnum())
    return normalize(a) == normalize(b)
assert is_anagram("Listen", "Silent")


9. **Format table row**

   * `fmt_row(values, widths)` → left-align strings to fixed widths, joined by `" | "`.

   ```python
   def fmt_row(values, widths):
       ...
   assert fmt_row(["a","bb"], [3,4]) == "a   | bb  "
   ```

In [None]:
def fmt_row(values, widths):
    parts = [str(v).ljust(w) for v, w in zip(values, widths)]
    return " | ".join(parts)
assert fmt_row(["a", "bb"], [3,4]) == "a   | bb  "


10. **Slugify**

    * `slugify(title)` → lowercase, trim, replace runs of non-alnum with single `-`, remove leading/trailing `-`.

    ```python
    def slugify(title):
        ...
    assert slugify("Hello,  World!!") == "hello-world"
    ```



In [None]:
def slugify(title):
    s = title.lower().strip()
    result = []
    for c in s:
        if c.isalnum():
            result.append(c)
        else:
            if result and result[-1] != '-':
                result.append('-')
    # Remove trailing '-' if present
    while result and result[-1] == '-':
        result.pop()
    # Remove leading '-' if present
    while result and result[0] == '-':
        result.pop(0)
    return "".join(result)
assert slugify("Hello,  World!!") == "hello-world"

#### using regex library `re`

In [None]:
import re


def slugify(title):
    s = title.lower().strip()
    s = re.sub(r'[^a-z0-9]+', '-', s)  # replace non-alphanumeric runs with '-'
    s = s.strip('-')                    # remove leading/trailing '-'
    return s

assert slugify("Hello,  World!!") == "hello-world"