# We learned about lists, sets, dictionaries, tuples, and strings. 

# However, what we have covered so far were only basic operations on those data structures. 


# We will now learn about advanced operations and manipulations and use fundamental data structures to represent more complex and higher-level data structures; this is often handy for doing data science in real life. 

# These higher-level topics will include iterator, stacks, queues, and file operations.

# 1.
# Iterators in Python are very useful when dealing with data as they allow you to parse the data one unit at a time. Iterators are stateful, which means it will be helpful to keep track of the previous state. An iterator is an object that implements the next method—meaning an iterator can iterate over collections such as lists, tuples, dictionaries, and more. Practically, this means that each time we call the method, it gives us the next element from the collection; if there is no further element in the list, then it raises a StopIteration exception


In [5]:
from itertools import permutations, combinations, dropwhile, repeat, zip_longest

#permutations?
#combinations?
#repeat?
#zip_longest?

In [13]:
# DIY 1
# Explore each function mentioned above from itertools

In [18]:
large_list_of_numbers = [1 for x in range (0, 10000000)]
large_list_of_numbers

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,


In [24]:
from sys import getsizeof
# the value is in bytes.
mb = 1048576 # 1mb  = 1048576 bytes
print ('The list is of: {} MB'.format(round(getsizeof(large_list_of_numbers)/mb,3)))

The list is of: 84.968 MB


In [27]:
# We will use the repeat() method from itertools to get the same number but with less memory
small_list_of_numbers = repeat(1, times=10000000)
print ('The list is of: {} MB'.format(round(getsizeof(small_list_of_numbers)/mb,5)))

The list is of: 5e-05 MB


In [32]:
# Did we get the list of numbers using repeat?
print ('We obtained {} type and not the actual list'.format(type(small_list_of_numbers)))

print ('You will get the list items only when you call it one by one. This is called as the lazy approach.')

for i,item in enumerate(small_list_of_numbers): 
    print (item)
    if i > 20:
        break
print ('enumerate is simply used to not print all values just for convenience')

We obtained <class 'itertools.repeat'> type and not the actual list
You will get the list items only when you call it one by one. This is called as the lazy approach.
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
enumerate is simply used to not print all values just for convenience


# Stacks

# A stack is a very useful data structure. 
# If you know a bit about CPU internals and how a program gets executed, then you will know that a stack is present in many such cases. 
# It is simply a list with one restriction, Last In First Out (LIFO), meaning an element that comes in last goes out first when a value is read from a stack. 

In [53]:
paris2024_stack = []
paris2024_stack.append('Neeraj Chopra')
paris2024_stack.append('Rohan Bopanna')
paris2024_stack.append('Dhinidhi Desinghu')
paris2024_stack.append('Bhowneesh Mendiratta')
paris2024_stack.append('Rudrankksh Patil')
paris2024_stack.append('Priyanka Goswami')
paris2024_stack

['Neeraj Chopra',
 'Rohan Bopanna',
 'Dhinidhi Desinghu',
 'Bhowneesh Mendiratta',
 'Rudrankksh Patil',
 'Priyanka Goswami']

In [54]:
# Let's read a value from our stack using the pop method. 
# This method reads the current last index of the list and returns it to us. 
# It also deletes the index once the read is done.
tos = paris2024_stack.pop()
tos

'Priyanka Goswami'

In [55]:
# since last value is removed from the list, if we add another value to the list it is appended at the end of list
paris2024_stack.append('Murali Sreeshankar')
paris2024_stack

['Neeraj Chopra',
 'Rohan Bopanna',
 'Dhinidhi Desinghu',
 'Bhowneesh Mendiratta',
 'Rudrankksh Patil',
 'Murali Sreeshankar']

In [57]:
def stack_pop(incoming_list):
    incoming_list.pop()
    return incoming_list

print (stack_pop(paris2024_stack))
print (paris2024_stack)

['Neeraj Chopra', 'Rohan Bopanna', 'Dhinidhi Desinghu', 'Bhowneesh Mendiratta']
['Neeraj Chopra', 'Rohan Bopanna', 'Dhinidhi Desinghu', 'Bhowneesh Mendiratta']


# Look closely. As you are changing the variable inside the function, the value of the mutable object is changing as well. This is called as the 'Pass by Reference' way in Python

In [64]:
# notice that original value does not change
def change_value_immutable(val):
    val += 'b'
    return val

main_val = 'alok'
print (change_value_immutable(main_val))
print (main_val)

#############################

# notice that original value changes
def change_value_mutable(val):
    val.append(88)
    return val

main_val = [1505]
print (change_value_mutable(main_val))
print (main_val)



alokb
alok
[1505, 88]
[1505, 88]


# Lambda Functions

# In general, it is not a good idea to change a variable's value inside a function.
# Any variable that is passed to the function should be considered and treated as immutable. 
# This is close to the principles of functional programming. 
# However, in that case, we could use unnamed functions that are neither immutable nor mutable and are typically not stored in a variable. 

# Such an expression or function, called a <span style="color:red"> lambda expression </span> in Python, is a way to construct one-line, nameless functions that are, by convention, side-effect-free and are loosely considered as implementing functional programming

# expression of lambda
# lambda x: do something with x

In [75]:
multiply = lambda x, y: x * y
print(multiply(2, 3))

6


In [77]:
numbers = [1, 2, 3, 4, 5]

squared = list(map(lambda x: x ** 2, numbers))               # use of map() function

print(squared)

[1, 4, 9, 16, 25]


In [78]:
numbers = [1, 2, 3, 4, 5]

squared = [(lambda x: x ** 2)(x) for x in numbers]            # list comprehension

print(squared)

[1, 4, 9, 16, 25]


In [83]:
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
binary = list(filter(lambda x: x <= 5 , numbers))
print(binary)  

[1, 2, 3, 4, 5]


# we will try to solve below expression using Lambda function 
![Screenshot%202024-07-28%20at%205.35.29%E2%80%AFPM.png](attachment:Screenshot%202024-07-28%20at%205.35.29%E2%80%AFPM.png)

In [71]:
import math

# sine_func() returns a lambda function
def sine_func():
    return lambda x: math.sin(math.radians(x))    

def cosine_func():
    return lambda x: math.cos(math.radians(x))

# since sine_func() returns a lambda function, you need to assign it to a variable and then use that variable as a lambda function further. 
sine = sine_func()
cosine = cosine_func()

print (math.pow(sine(30),2)+math.pow(cosine(30),2))

1.0


In [73]:
# remember sorted() function we learnt in previous notebook (1_Basics_ProblemSetSolution.ipynb)
# let's revisit sorted() 
state_capitals = [("Uttarakhand", "Dehradun"), ("Rajasthan", "Jaipur"), ("West Bengal","Kolkata"), ("Karnataka", "Bengaluru")]
# your job is to arrange state names by sorting on the capitals
sorted(state_capitals, key=lambda tuple_from_list: tuple_from_list[1])

[('Karnataka', 'Bengaluru'),
 ('Uttarakhand', 'Dehradun'),
 ('Rajasthan', 'Jaipur'),
 ('West Bengal', 'Kolkata')]

# Queue

# Apart from stacks, another high-level data structure type that we are interested in is queues. 

# A queue is like a stack, which means that you continue adding elements one by one. With a queue, the reading of elements obeys the First in First Out (FIFO) strategy. 

In [96]:
%%time
queue = []
for i in range(0, 10000):
    queue.append(i)

print("Queue created")
queue

# if wall time is less than total CPU time, it might indicate that your code is using multithreading or multiprocessing.


Queue created
CPU times: user 1.34 ms, sys: 595 µs, total: 1.93 ms
Wall time: 1.42 ms


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 32,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 52,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 73,
 74,
 75,
 76,
 77,
 78,
 79,
 80,
 81,
 82,
 83,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 91,
 92,
 93,
 94,
 95,
 96,
 97,
 98,
 99,
 100,
 101,
 102,
 103,
 104,
 105,
 106,
 107,
 108,
 109,
 110,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 123,
 124,
 125,
 126,
 127,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 143,
 144,
 145,
 146,
 147,
 148,
 149,
 150,
 151,
 152,
 153,
 154,
 155,
 156,
 157,
 158,
 159,
 160,
 161,
 162,
 163,
 164,
 165,
 166,
 167,
 168,
 169,
 170,
 171,
 172,
 173,
 174,
 175,
 176,
 177,
 178,
 179,
 180,
 181,
 182,
 183,
 184,


In [97]:
%%time
for i in range(0, 10000):
    queue.pop(0)

print("Queue emptied")
queue

# It takes this amount of time because of the pop(0) operation, 
# which means every time we pop a value from the left of the list (the current 0 index),
# Python has to rearrange all the other elements of the list by shifting them one
# space left. Indeed, it is not a very optimized implementation.

Queue emptied
CPU times: user 9.89 ms, sys: 436 µs, total: 10.3 ms
Wall time: 10.1 ms


[]

In [104]:
%%time
from collections import deque
queue2 = deque()
for i in range(0, 10000):
    queue2.append(i)
    
print("Queue created")
# Implement the same queue using the deque data structure from Python's collections package 
# and perform the append and pop functions on this data structure


Queue created
CPU times: user 1.43 ms, sys: 501 µs, total: 1.93 ms
Wall time: 1.62 ms


In [105]:
%%time
for i in range(0, 10000):
    queue2.popleft()
print("Queue emptied")

Queue emptied
CPU times: user 1.51 ms, sys: 769 µs, total: 2.28 ms
Wall time: 1.63 ms


In [None]:
# We have not covered linked lists, trees, graphs in this course. 

# Basic File Operations

# We will learn about a few OS-level functions in Python used for working with file. 
# We will focus on file-related functions and learn how to open a file, read the data line by line or all at once.
# and finally, how to close the file we opened. 
# The closing operation of a file should be done cautiously, which is ignored most of the time by developers.

In [106]:
import os

In [110]:
print (os.environ)

environ({'TERM_PROGRAM': 'iTerm.app', 'TERM': 'xterm-color', 'SHELL': '/bin/bash', 'TMPDIR': '/var/folders/5g/0yr9vjys1xb_dx5d7p39f5dc0000gn/T/', 'CONDA_SHLVL': '2', 'CONDA_PROMPT_MODIFIER': '(eda_env) ', 'TERM_PROGRAM_VERSION': '3.4.23', 'GSETTINGS_SCHEMA_DIR_CONDA_BACKUP': '', 'OLDPWD': '/Users/alokbhardwaj', 'TERM_SESSION_ID': 'w0t0p0:6048EFFA-A4CF-4A6C-BC48-B75D1B47D63A', 'USER': 'alokbhardwaj', 'COMMAND_MODE': 'unix2003', 'CONDA_EXE': '/Users/alokbhardwaj/opt/anaconda3/bin/conda', 'SSH_AUTH_SOCK': '/private/tmp/com.apple.launchd.LJy5uZjOVL/Listeners', '__CF_USER_TEXT_ENCODING': '0x1F5:0x0:0x2', '_CE_CONDA': '', 'CONDA_PREFIX_1': '/Users/alokbhardwaj/opt/anaconda3', 'PATH': '/Users/alokbhardwaj/opt/anaconda3/envs/eda_env/bin:/Users/alokbhardwaj/opt/anaconda3/condabin:/Users/alokbhardwaj/anaconda3/bin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptex

In [111]:
print(os.getenv('PATH'))

/Users/alokbhardwaj/opt/anaconda3/envs/eda_env/bin:/Users/alokbhardwaj/opt/anaconda3/condabin:/Users/alokbhardwaj/anaconda3/bin:/usr/local/bin:/System/Cryptexes/App/usr/bin:/usr/bin:/bin:/usr/sbin:/sbin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/local/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/bin:/var/run/com.apple.security.cryptexd/codex.system/bootstrap/usr/appleinternal/bin:/opt/X11/bin:/Library/Apple/usr/bin


In [113]:
print(os.getenv('PWD'))

/Users/alokbhardwaj/Library/CloudStorage/Dropbox/alok/IITR_Civil/Courses/2024/autumn/dai_101/lectures


# Environment variables play a crucial role in the operating system
# 1. Configuration: configure the behavior of the operating system and applications without changing code. For example, setting the PWD variable to a new directory
# 2. Security: used to store sensitive information, such as API keys and passwords, securely. Applications can access these values at runtime without hardcoding them in the source code.
# 3. Portability: You can port your code across development, testing, production phase by changing the configuration settings through environment variables rather than code changes.
# environment variables can make your applications more flexible, secure, and easier to manage across different environments.

In [115]:
# you can assess all environment variables by writing code in dictionary way
for key in os.environ: 
    print (key)

TERM_PROGRAM
TERM
SHELL
TMPDIR
CONDA_SHLVL
CONDA_PROMPT_MODIFIER
TERM_PROGRAM_VERSION
GSETTINGS_SCHEMA_DIR_CONDA_BACKUP
OLDPWD
TERM_SESSION_ID
USER
COMMAND_MODE
CONDA_EXE
SSH_AUTH_SOCK
__CF_USER_TEXT_ENCODING
_CE_CONDA
CONDA_PREFIX_1
PATH
GSETTINGS_SCHEMA_DIR
LaunchInstanceID
CONDA_PREFIX
__CFBundleIdentifier
PWD
ITERM_PROFILE
XPC_FLAGS
_CE_M
XPC_SERVICE_NAME
SHLVL
HOME
COLORFGBG
LC_TERMINAL_VERSION
ITERM_SESSION_ID
CONDA_PYTHON_EXE
LOGNAME
LC_CTYPE
CONDA_DEFAULT_ENV
LC_TERMINAL
DISPLAY
SECURITYSESSIONID
SQLITE_EXEMPT_PATH_FROM_VNODE_GUARDS
COLORTERM
_
PYDEVD_USE_FRAME_EVAL
JPY_PARENT_PID
CLICOLOR
FORCE_COLOR
CLICOLOR_FORCE
PAGER
GIT_PAGER
MPLBACKEND


# File Opening
# We will learn about how to open a file in Python. 
# Python has a built-in open function that we will use to open a file. 
# Python uses underlying system-level calls to open a file handler and return it to the caller.

![Screenshot%202024-07-29%20at%207.23.32%E2%80%AFAM.png](attachment:Screenshot%202024-07-29%20at%207.23.32%E2%80%AFAM.png)

In [131]:
file1 = open('datasets/Table_EX_1.txt')            # by default, the file is open in r+t mode

In [132]:
file2 = open('datasets/Table_EX_1.txt','rb')   # if you wish to open in r+b mode

In [133]:
print (file1)
print (file2)

<_io.TextIOWrapper name='datasets/Table_EX_1.txt' mode='r' encoding='UTF-8'>
<_io.BufferedReader name='datasets/Table_EX_1.txt'>


In [134]:
# open file for writing
# BEFORE RUNNING THE BELOW COMMAND
# The mode 'w' stands for "write", and it behaves as follows:
# If the file does not exist, it creates a new file.
# If the file does exist, it truncates the file (i.e., deletes all its contents) before writing to it.

file3 = open('datasets/sample_Table_EX_1.txt','w')
file3

<_io.TextIOWrapper name='datasets/sample_Table_EX_1.txt' mode='w' encoding='UTF-8'>

In [135]:
# it is important to close an open file
file1.close()
file2.close()
file3.close()

# With command for opening files
# With command is like a context manager. If you forget to close the file, With command will close the file. 
# The main purpose of a context manager is to ensure that resources are properly managed, which means that resources are properly acquired and released regardless of whether an error occurs within the block of code.

In [136]:
with open("datasets/Table_EX_1.txt") as file4:
    print(file4.closed)
print(file4.closed)

# If we execute the preceding code, we will see that the first print will end up printing
# False, whereas the second one will print True. 
# This means that as soon as the control goes out of the with block, the file descriptor is automatically closed.

False
True


In [137]:
with open("datasets/Table_EX_1.txt") as file5:
    for line in file5:
        print(line)

One evening when he was sitting with his son in his lap, shelling peas, he

saw a nock of sheep coming towards him. He said to himself, 'The sheep

path doesn't come that way. Can't those sheep go along the bank? What's the

idea, coming over here? They'll trample and gobble up the crop. I bet it's

Buddhu the shepherd-- just look at his nerve! He can see me here but he

won't drive his sheep back. What good will it do me to put up with this?


In [146]:
# let's open a file to write to it
with open("datasets/sample_Table_EX_1.txt", "w") as file6:
    for state, capital in state_capitals:
        file6.write('The capital of {} is {}\n'.format(state, capital))

with open("datasets/sample_Table_EX_1.txt",'r') as file7:
    for line in file7: 
        print (line)

The capital of Uttarakhand is Dehradun

The capital of Rajasthan is Jaipur

The capital of West Bengal is Kolkata

The capital of Karnataka is Bengaluru



In [147]:
# let's append to the file
# let's open a file to write to it
with open("datasets/sample_Table_EX_1.txt", "a") as file8:
    file8.write('The capital of {} is {}\n'.format('Haryana', 'Chandigarh'))
    
with open("datasets/sample_Table_EX_1.txt",'r') as file8:
    for line in file8: 
        print (line)

The capital of Uttarakhand is Dehradun

The capital of Rajasthan is Jaipur

The capital of West Bengal is Kolkata

The capital of Karnataka is Bengaluru

The capital of Haryana is Chandigarh



In [148]:
# let's append to the file
# let's open a file to write to it
with open("datasets/sample_Table_EX_1.txt", "a") as file9:
    print ('The capital of {} is {}'.format('Madhya Pradesh', 'Bhopal'),file=file9) # note change in command and removal of \n in print statement
    
with open("datasets/sample_Table_EX_1.txt",'r') as file9:
    for line in file9: 
        print (line)

The capital of Uttarakhand is Dehradun

The capital of Rajasthan is Jaipur

The capital of West Bengal is Kolkata

The capital of Karnataka is Bengaluru

The capital of Haryana is Chandigarh

The capital of Madhya Pradesh is Bhopal



In [None]:
# DIY2
# Create a file 'sample_IITR_buildings'. Add four names of buildings and their functions in IIT Roorkee

In [16]:
# Solutions to DIY
# DIY 1
example_string = 'IIT'
perms = permutations(example_string)
for perm in perms:
    print(''.join(perm))
    
print ('*'*20)

example_numbers = [1, 2, 3]
combs = combinations(example_numbers, 2)
for comb in combs:
    print (comb)
    
print ('*'*20)

repeat_values  = repeat('A', 5)
for val in repeat_values:
    print(val)
    
print ('*'*20)

val1 = 'TREE'
val2 = 'WATER'
result = zip_longest(val1, val2, fillvalue='*')

for item in result:
    print(item)

IIT
ITI
IIT
ITI
TII
TII
********************
(1, 2)
(1, 3)
(2, 3)
********************
A
A
A
A
A
********************
('T', 'W')
('R', 'A')
('E', 'T')
('E', 'E')
('*', 'R')


In [150]:
# DIY2
buildings_IITR = {'JT Building':'Administration','MFS Department':'Academic','LBS Ground':'Sports','Jawahar Bhavan':'Student Accomodation'}

# let's open a file to write to it
with open("datasets/sample_IITR_buildings.txt", "w") as file10:
    for name, purpose in buildings_IITR.items():
        file10.write('The purpose of {} is {}\n'.format(name, purpose))

with open("datasets/sample_IITR_buildings.txt",'r') as file10:
    for line in file10: 
        print (line)

The purpose of JT Building is Administration

The purpose of MFS Department is Academic

The purpose of LBS Ground is Sports

The purpose of Jawahar Bhavan is Student Accomodation

