### 3.1 Data Structures and Sequences

#### Tuple
fixed-length, immutable

In [32]:
# Ways to create a tuple
tup1 = (1, 2, 3, 4)
tup2 = 4, 5, 6
tup3 = tuple("Hello")

print(tup1, tup2, tup3, sep = "\n")

(1, 2, 3, 4)
(4, 5, 6)
('H', 'e', 'l', 'l', 'o')


In [33]:
# Accessing elements
print(tup1[0])

1


In [34]:
# Nested tuples
nested_tup = (4, 5, 6), (7, 8)
print(nested_tup[0], nested_tup[0][0], sep="\n")

(4, 5, 6)
4


In [35]:
# Not possible to modify elements of a tuple
tup = tuple(["hi", [1, 2], True])
print(tup)

try:
    tup[0] = False
except TypeError:
    print("A TypeError exception occured.")

('hi', [1, 2], True)
A TypeError exception occured.


In [36]:
# if an object of a tuple is mutable, it can be modified in place
tup[1].append(3)
tup

('hi', [1, 2, 3], True)

In [37]:
# Concatenation
tup1 = 1, 2
tup2 = 3, 4, 5
tup3 = tup1 + tup2

tup3

(1, 2, 3, 4, 5)

##### *Unpacking Tuples*

In [39]:
tup = 1, 2, 3
a, b, c = tup

print(a, b, c)

1 2 3


In [40]:
tup = 1, 2, (3, 4, 5)
a, b, c = tup
c

(3, 4, 5)

In [41]:
tup = (1, 2, 3), (4, 5, 6), (7, 8, 9)
for a, b, c in tup:
    print(f"a={a}, b={b}, c={c}")

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [43]:
vals = 1, 2, 3, 4, 5, 6, 7
a, b, *rest = vals       # can use underscore(_) to discard
rest

[3, 4, 5, 6, 7]

In [45]:
# Tuple Methods
tup = 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 6, 7, 3
tup.count(3)

7

#### List
mutable, variable length

In [48]:
# Creating lists
list1 = [1, 2, 3, 4]
tup = 1, 2
list2 = list(tup)

print(list1, list2, list1[0], sep="\n")

[1, 2, 3, 4]
[1, 2]
1


In [58]:
generator = range(0,10)
list1 = list(generator)
list1

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [59]:
# Add and Remove
list1.append("last")
print(list1)

list1.insert(0,"first")
print(list1)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'last']
['first', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 'last']


In [60]:
list1.pop(-1)
print(list1)

['first', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [62]:
list1.remove(0)
list1

['first', 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [63]:
print("first" in list1)
list1.remove("first")
print("first" in list1)

True
False


In [64]:
# Concatenation and combining lists
list1 = [1, 2, 3, 4]
list2 = [5, 6, 7, 8] 

list1.extend(list2)
list1

[1, 2, 3, 4, 5, 6, 7, 8]

In [66]:
list_of_lists = [[1, 2], [3, 4, 5], [6]]
list1 = []

for item in list_of_lists:
    list1.extend(item)

list1

[1, 2, 3, 4, 5, 6]

In [67]:
# Sorting
list1 = [4, 5, 2, 1, 9, 22, 45, 12, 14]
list1.sort()
list1

[1, 2, 4, 5, 9, 12, 14, 22, 45]

In [73]:
list1 = ["Hi", "Array", "Data", "slower", "fast", "panda"]
list1.sort()
list1

['Array', 'Data', 'Hi', 'fast', 'panda', 'slower']

In [74]:
list1 = ["Hi", "Array", "Data", "slower", "fast", "panda"]
list1.sort(key=len)
list1

['Hi', 'Data', 'fast', 'Array', 'panda', 'slower']

##### Slicing

In [75]:
seq = [1, 2, 3, 4, 5, 6, 7, 8, 9]
seq[1:4]

[2, 3, 4]

In [76]:
seq[1:4] = [0, 0, 0, 0]
seq

[1, 0, 0, 0, 0, 5, 6, 7, 8, 9]

In [84]:
seq = [1, 2, 3, 4, 5, 6, 7, 8, 9]

print(f"First 3 elements: seq[:3] = {seq[:3]}")
print(f"Fourth element to last: seq[3:] = {seq[3:]}")
print(f"Last 3 elements: seq[-3:] = {seq[-3:]}")
print(f"seq[-4:-1] = {seq[-4:-1]}")

First 3 elements: seq[:3] = [1, 2, 3]
Fourth element to last: seq[3:] = [4, 5, 6, 7, 8, 9]
Last 3 elements: seq[-3:] = [7, 8, 9]
seq[-4:-1] = [6, 7, 8]


In [86]:
print(f"Take every other element: seq[::2] = {seq[::2]}")
print(f"Reverse of a sequence: seq[::-1] = {seq[::-1]}")


Take every other element: seq[::2] = [1, 3, 5, 7, 9]
Reverse of a sequence: seq[::-1] = [9, 8, 7, 6, 5, 4, 3, 2, 1]


#### Dictionary

In [93]:
d1 = {"a": "Hi", "b": [1, 2, 3]}
d1

{'a': 'Hi', 'b': [1, 2, 3]}

In [94]:
d1[3] = "new"
d1

{'a': 'Hi', 'b': [1, 2, 3], 3: 'new'}

In [95]:
"b" in d1

True

In [96]:
del d1[3]
d1

{'a': 'Hi', 'b': [1, 2, 3]}

In [97]:
val = d1.pop("b")
print(d1, val, sep="\n")

{'a': 'Hi'}
[1, 2, 3]


In [102]:
# Keys and Values
d1 = {"a": "Hi", "b": [1, 2, 3], "c": 6}
keys = list(d1.keys())
vals = list(d1.values())

print(f"Keys:   {keys}\nValues: {vals}")

Keys:   ['a', 'b', 'c']
Values: ['Hi', [1, 2, 3], 6]


In [103]:
# iterating over
for key, value in d1.items():
    print(f"Key: {key} --- Value: {value}")

Key: a --- Value: Hi
Key: b --- Value: [1, 2, 3]
Key: c --- Value: 6


In [104]:
# Merging dicts
d1.update({"b": "changed", "c": "new"})
d1

{'a': 'Hi', 'b': 'changed', 'c': 'new'}

##### Creating Dictionaries from Sequences

In [106]:
tuples = zip(range(6), reversed(range(6)))

dict1 = dict(tuples)
dict1

{0: 5, 1: 4, 2: 3, 3: 2, 4: 1, 5: 0}

##### setdefault

In [108]:
# Categorizing a list of words by their first letter by not using setdefault
words = ["alpha", "beta", "delta", "adapt", "ad", "bravo", "char", "echo", "fox"]
by_letter = {}
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = []
    by_letter[letter].append(word)
by_letter

{'a': ['alpha', 'adapt', 'ad'],
 'b': ['beta', 'bravo'],
 'd': ['delta'],
 'c': ['char'],
 'e': ['echo'],
 'f': ['fox']}

In [109]:
# Categorizing a list of words by their first letter with setdefault
words = ["alpha", "beta", "delta", "adapt", "ad", "bravo", "char", "echo", "fox"]
by_letter = {}
for word in words:
    letter = word[0]
    by_letter.setdefault(letter, []).append(word)
by_letter

{'a': ['alpha', 'adapt', 'ad'],
 'b': ['beta', 'bravo'],
 'd': ['delta'],
 'c': ['char'],
 'e': ['echo'],
 'f': ['fox']}

In [116]:
# Even easier way
from collections import defaultdict
by_letter={}
by_letter = defaultdict(list)
for word in words:
    by_letter[word[0]].append(word)
by_letter

defaultdict(list,
            {'a': ['alpha', 'adapt', 'ad'],
             'b': ['beta', 'bravo'],
             'd': ['delta'],
             'c': ['char'],
             'e': ['echo'],
             'f': ['fox']})

##### Valid dictionary key types
Keys should be immutable

In [123]:
print(hash("string"), hash(2), hash((1, 2, 3, (4, 5,))), sep="\n")

try:
    print(hash((1, 2, 3, [4, 5])))
except TypeError:
    print("TypeError occured since tuple has immutable elements")

try:
    print(hash([1, 2, 3]))
except TypeError:
    print("TypeError occured since lists are mutable")

6855552267352188548
2
-6766371705598827417
TypeError occured since tuple has immutable elements
TypeError occured since lists are mutable


In [124]:
# Convert list to tuple to be able to use it as a key of a dict
list1 = [1, 2, 3]
tup = tuple(list1)

d1 = {}
d1[tup] = "first"
d1

{(1, 2, 3): 'first'}

#### Set
unique elements

In [1]:
# Creating sets
a = set([1, 2, 3, 4])
b = {4, 5, 6, 7}

print(a, b, sep="\n")

{1, 2, 3, 4}
{4, 5, 6, 7}


In [2]:
# Mathematical Set Operations
print(f"Union of sets: a.union(b) = {a.union(b)}")
print(f"Union of sets: a|b = {a|b}", end="\n\n")

print(f"Intersection of sets: a.intersection(b) = {a.intersection(b)}")
print(f"Intersection of sets: a & b = {a & b}")

# Difference between these is python converts inputs to sets when we use (intersection(), union())

Union of sets: a.union(b) = {1, 2, 3, 4, 5, 6, 7}
Union of sets: a|b = {1, 2, 3, 4, 5, 6, 7}

Intersection of sets: a.intersection(b) = {4}
Intersection of sets: a & b = {4}


In [3]:
c = a.copy()
c |= b
print(c)

d = a.copy()
d &= b
print(d)

{1, 2, 3, 4, 5, 6, 7}
{4}


In [6]:
set1 = {1, 2, 3, 4, 5}
print(set1.issubset({1,2,4,5,3,11,12,13}))
print(set1.issuperset({1,2}))

True
True


#### Built-in Sequence Functions

In [11]:
# enumerate
nums = [10,21,34,54,23,111]
for index, val in enumerate(nums):
    print(f"Index: {index}, Value: {val}")
print("--------------------")
for index, val in enumerate(nums, start=1):
    print(f"Index: {index}, Value: {val}")

Index: 0, Value: 10
Index: 1, Value: 21
Index: 2, Value: 34
Index: 3, Value: 54
Index: 4, Value: 23
Index: 5, Value: 111
--------------------
Index: 1, Value: 10
Index: 2, Value: 21
Index: 3, Value: 34
Index: 4, Value: 54
Index: 5, Value: 23
Index: 6, Value: 111


In [13]:
# Sorted
print(sorted([1,22,2,333,4,5555,6,777777,0]))

print(sorted("data structures"))

[0, 1, 2, 4, 6, 22, 333, 5555, 777777]
[' ', 'a', 'a', 'c', 'd', 'e', 'r', 'r', 's', 's', 't', 't', 't', 'u', 'u']


In [5]:
# Zip
seq1 = ["a", "b", "c"]
seq2 = [1, 2, 3]

zipped = zip(seq1, seq2)
print(list(zipped))

seq3 = [True, False]
zipped_ = zip(seq1, seq2, seq3)   # shortest seq determines the num of pairs
print(list(zipped_))

[('a', 1), ('b', 2), ('c', 3)]
[('a', 1, True), ('b', 2, False)]


In [8]:
# Sample use of zip
for index, (letter, num) in enumerate(zip(seq1, seq2)):
    print(f"{index}: {letter}, {num}")

0: a, 1
1: b, 2
2: c, 3


In [11]:
# Reversed, is a generator
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

##### List, Set and Dict Comprehensions

In [None]:
seq = [1, 2, 3, 4, 5, 6, 7]

result = []
for val in seq:
    if(val % 2 == 0):
        result.append(val ** 2)
print(result)

# Equivalent list comprehension
print([x**2 for x in seq if(x % 2 == 0)])

[4, 16, 36]
[4, 16, 36]


In [22]:
# Set comprehension
{x**2 for x in seq if(x % 2 ==0)}

{4, 16, 36}

In [23]:
# Dict comprehension
{index: value for index, value in enumerate(seq) if(value % 2 == 0)}

{1: 2, 3: 4, 5: 6}

In [30]:
# Nested list comprehension
tuples = [(1, 2, 3), (4, 5), (6, 7, 8)]
[x for tup in tuples for x in tup]

[1, 2, 3, 4, 5, 6, 7, 8]

In [31]:
[[x for x in tup] for tup in tuples]

[[1, 2, 3], [4, 5], [6, 7, 8]]

In [32]:
%reset -f

### 3.2 Functions

In [34]:
a = []
def func():
    for i in range(5):
        a.append(i)

func()
print(a)
func()
print(a)

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4, 0, 1, 2, 3, 4]


In [51]:
# function to calculate 4 mathematical operations
def calc(num1, num2):
    addition = num1 + num2
    subtraction = num1 - num2
    multiplication = num1 * num2
    division = num1 / num2
    return f"Operations on {num1} and {num2}:\n---------", f"addition: {addition}", f"subtraction: {subtraction}", f"multiplication: {multiplication}", f"division: {division}"  

info, a, b, c, d = calc(5, 4)
print(info, a, b, c, d, sep="\n", end="\n\n")

tuple = calc(5, 6)
# tuple=tuple[1:]
for val in tuple:
    print(val)

Operations on 5 and 4:
---------
addition: 9
subtraction: 1
multiplication: 20
division: 1.25

Operations on 5 and 6:
---------
addition: 11
subtraction: -1
multiplication: 30
division: 0.8333333333333334


In [56]:
import re

langs = ["PyThon", "JavA!#", " Cpp  ", "R"]

def clean(to_clean):
    result = []
    for val in to_clean:
        val = val.strip()
        val = re.sub("[!#?]", "", val)
        val = val.title()
        result.append(val)
    
    return result

clean(langs)

['Python', 'Java', 'Cpp', 'R']

In [57]:
def remove_punct(val):
    return re.sub("[!#?]", "", val)

clean_ops = [str.strip, remove_punct, str.title]

def clean(to_clean, ops):
    result = []
    for val in to_clean:
        for func in ops:
            val = func(val)
        result.append(val)
    return result

clean(langs, clean_ops)

['Python', 'Java', 'Cpp', 'R']

##### Lambda Functions

In [63]:
def apply_func(seq, func):
    return[func(val) for val in seq]

ints = [0, 1, 2, 3, 4]

apply_func(ints, lambda x: x ** 2)

[0, 1, 4, 9, 16]

##### Generators

In [69]:
def squares(n = 10):
    print(f"Generating squares from 1 to {n ** 2}:")
    for i in range(1, n+1):
        yield i ** 2
gen = squares(4)
print(list(gen))

gen = squares()
for x in gen:
    print(x, end=" ")

Generating squares from 1 to 16:
[1, 4, 9, 16]
Generating squares from 1 to 100:
1 4 9 16 25 36 49 64 81 100 

In [71]:
# Generator expressions
gen = (x ** 2 for x in range(1, 11))
list(gen)

[1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

In [73]:
sum(x ** 2 for x in range(100))

328350

In [76]:
dict((i, i **2) for i in range(10))

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}

In [83]:
# itertools
import itertools

def first_digit(x):
    return int(x / 10)

ints = [i for i in range(10,100)]

for digit, nums in itertools.groupby(ints, first_digit):
    print(digit, list(nums), sep=": ")


1: [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
2: [20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
3: [30, 31, 32, 33, 34, 35, 36, 37, 38, 39]
4: [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
5: [50, 51, 52, 53, 54, 55, 56, 57, 58, 59]
6: [60, 61, 62, 63, 64, 65, 66, 67, 68, 69]
7: [70, 71, 72, 73, 74, 75, 76, 77, 78, 79]
8: [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]
9: [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


### 3.3 Files and the Operating System

In [112]:
path = r"../examples/file1.txt"
f = open(path, encoding="utf-8")

for line in f:
    print(line , end="")

f.close()

Ömer Yaslıtaş
Sample txt file.
For python for 


data science practice repo
chapter 3


section 3
topic is file handling
...........

In [114]:
f = open(path, encoding="utf-8")
print(f.read(14))
print(f.tell())
f.close()

Ömer Yaslıtaş

18


In [121]:
# Handling blank lines
path2 = "../examples/tmp.txt"
with open(path2, mode="w") as f:
    f.writelines(x for x in open(path) if len(x) > 1)

with open(path2) as f:
    for line in f:
        print(line, end="")


Ömer Yaslıtaş
Sample txt file.
For python for 
data science practice repo
chapter 3
section 3
topic is file handling
...........

In [122]:
with open(path, mode="rb") as f:
    data = f.read(18)
print(data)
print(data.decode("utf-8"))

b'\xc3\x96mer Yasl\xc4\xb1ta\xc5\x9f\r\n'
Ömer Yaslıtaş



In [123]:
import os

try:
    os.remove(path2)
    print("Successfuly deleted.")
except FileNotFoundError:
    print("FileNotFoundError occured.")

Successfuly deleted.
