## Searching in Files

In [1]:
# Example: Write marks to a file
import os
marks_data = "Alice,85\nBob,92\nCharlie,78\nDiana,95\nEve,88\nFrank,72\nGrace,91\nHenry,83\n"
if not os.path.exists('data/marks.txt'):
    with open('data/marks.txt', 'w') as f:
        f.write(marks_data)
else:
    print('marks.txt already exists')

In [2]:
# Find lines containing a word
search_word = 'Python'

# Ensure the file exists for demo
sample_content = """Python is great.\nI love Python.\nJava is also popular.\n"""
import os
if not os.path.exists('data/search_demo.txt'):
    with open('data/search_demo.txt', 'w') as f:
        f.write(sample_content)

with open('data/search_demo.txt') as f:
    for line in f:
        if search_word in line:
            print(line.strip())

Python is great.
I love Python.


In [3]:
# Case-insensitive search
search_word = 'java'

with open('data/search_demo.txt') as f:
    for line in f:
        if search_word.lower() in line.lower():
            print(line.strip())

Java is also popular.


In [4]:
# Count occurrences of a word
search_word = 'Python'
count = 0

with open('data/search_demo.txt') as f:
    for line in f:
        count += line.count(search_word)

print(f"'{search_word}' appears {count} times")

'Python' appears 2 times


## Parsing Structured Data

Process files with delimited data (CSV-like).

In [5]:
# Create sample marks file
marks_data = """Alice 25 30 35\nBob 20 25 30\nCharlie 30 28 32\nDiana 15 10 12"""
with open('data/marks.txt', 'w') as f:
    f.write(marks_data)

In [6]:
# Process marks: calculate total, pass/fail
PASSING_MARKS = 40

with open('data/marks.txt') as f:
    for line in f:
        parts = line.split()
        name = parts[0]
        marks = [int(m) for m in parts[1:]]
        total = sum(marks)
        
        status = 'PASSED' if total >= PASSING_MARKS else 'FAILED'
        print(f"{name}: {total} marks - {status}")

Alice: 90 marks - PASSED
Bob: 75 marks - PASSED
Charlie: 90 marks - PASSED
Diana: 37 marks - FAILED


In [7]:
# Parse CSV-like data with comma separator
csv_data = """name,age,city\nAlice,25,New York\nBob,30,Los Angeles\nCharlie,35,Chicago"""
with open('data/people.csv', 'w') as f:
    f.write(csv_data)

# Read and parse
with open('data/people.csv') as f:
    header = f.readline().strip().split(',')
    print(f"Columns: {header}")
    print()
    
    for line in f:
        values = line.strip().split(',')
        person = dict(zip(header, values))
        print(f"{person['name']} is {person['age']} from {person['city']}")

Columns: ['name', 'age', 'city']

Alice is 25 from New York
Bob is 30 from Los Angeles
Charlie is 35 from Chicago


## Finding Patterns

In [8]:
# Create wordlist
words = """cat\ndog\nelephant\nant\nhippopotamus\nbee\ncat\ndog"""
with open('data/wordlist.txt', 'w') as f:
    f.write(words)

In [9]:
# Analyze wordlist
three_letter_words = []
longest_word = ''
unique_words = []
duplicates = []

with open('data/wordlist.txt') as f:
    for line in f:
        word = line.strip()
        
        # Three letter words
        if len(word) == 3:
            three_letter_words.append(word)
        
        # Longest word
        if len(word) > len(longest_word):
            longest_word = word
        
        # Duplicates
        if word in unique_words:
            if word not in duplicates:
                duplicates.append(word)
        else:
            unique_words.append(word)

print(f"3-letter words: {three_letter_words}")
print(f"Longest word: {longest_word}")
print(f"Duplicates: {duplicates}")

3-letter words: ['cat', 'dog', 'ant', 'bee', 'cat', 'dog']
Longest word: hippopotamus
Duplicates: ['cat', 'dog']


## Processing Log Files

In [10]:
# Create sample user log
log_data = """Alice,09:00,10:30\nBob,13:00,13:45\nCharlie,14:00,16:30\nDiana,11:00,11:15"""
with open('data/users.txt', 'w') as f:
    f.write(log_data)

In [11]:
# Find users online for at least 1 hour
with open('data/users.txt') as f:
    for line in f:
        parts = line.strip().split(',')
        name = parts[0]
        
        # Parse times
        start_h, start_m = map(int, parts[1].split(':'))
        end_h, end_m = map(int, parts[2].split(':'))
        
        # Calculate duration in minutes
        start_mins = start_h * 60 + start_m
        end_mins = end_h * 60 + end_m
        duration = end_mins - start_mins
        
        if duration >= 60:
            print(f"{name}: online for {duration} minutes")

Alice: online for 90 minutes
Charlie: online for 150 minutes


## Common Patterns

In [12]:
# Filter and write to new file
with open('data/search_demo.txt') as fin:
    with open('data/python_lines.txt', 'w') as fout:
        for line in fin:
            if 'Python' in line:
                fout.write(line)

print("Filtered lines written to data/python_lines.txt")

Filtered lines written to data/python_lines.txt


In [13]:
# Line numbers with content
with open('data/search_demo.txt') as f:
    for i, line in enumerate(f, 1):
        print(f"Line {i}: {line.strip()}")

Line 1: Python is great.
Line 2: I love Python.
Line 3: Java is also popular.


In [14]:
# Skip header line
with open('data/people.csv') as f:
    next(f)  # Skip header
    for line in f:
        print(line.strip())

Alice,25,New York
Bob,30,Los Angeles
Charlie,35,Chicago


## Quick Reference

| Task | Code |
|------|------|
| Search in line | `if word in line:` |
| Split by space | `line.split()` |
| Split by comma | `line.split(',')` |
| Remove whitespace | `line.strip()` |
| Count occurrences | `line.count(word)` |
| Line number | `enumerate(f, 1)` |
| Skip line | `next(f)` |

## Practice Problems

1. Find all lines in a file that start with a specific letter
2. Count how many lines are longer than 50 characters
3. Read a marks file and find the student with highest total
4. Parse a log file and find peak usage hour
5. Replace all occurrences of a word and save to new file

In [15]:
# 1. Lines starting with specific letter
with open('data/search_demo.txt') as f:
    lines_with_i = [line.strip() for line in f if line.startswith('I')]
print(f"1. Lines starting with 'I': {lines_with_i}")

# 2. Lines longer than 30 chars (using 30 for demo)
with open('data/search_demo.txt') as f:
    long_lines = sum(1 for line in f if len(line.strip()) > 30)
print(f"2. Lines > 30 chars: {long_lines}")

# 3. Student with highest total
best_student = ''
best_total = 0
with open('data/marks.txt') as f:
    for line in f:
        parts = line.split()
        name = parts[0]
        total = sum(int(m) for m in parts[1:])
        if total > best_total:
            best_total = total
            best_student = name
print(f"3. Top student: {best_student} with {best_total}")

# 4. Peak usage hour (most logins)
hours = {}
with open('data/users.txt') as f:
    for line in f:
        start_time = line.strip().split(',')[1]
        hour = start_time.split(':')[0]
        hours[hour] = hours.get(hour, 0) + 1
peak = max(hours, key=hours.get)
print(f"4. Peak hour: {peak}:00 with {hours[peak]} logins")

# 5. Replace word and save
with open('data/search_demo.txt') as fin:
    content = fin.read().replace('Python', 'PYTHON')
with open('data/replaced.txt', 'w') as fout:
    fout.write(content)
print("5. Created data/replaced.txt with 'Python' -> 'PYTHON'")

1. Lines starting with 'I': ['I love Python.']
2. Lines > 30 chars: 0
3. Top student: Alice with 90
4. Peak hour: 09:00 with 1 logins
5. Created data/replaced.txt with 'Python' -> 'PYTHON'
