In [None]:
# Filtering and Iterating over Dictionaries -- In Class Oct 17
# Zeke Van Dehy
# Nov 5, 2020

# Let's make an example #

In the cell below, create a dictionary called aamws from the file aamw.txt, as we did last time. We'll use this file in our subsequent examples. MAKE SURE THAT YOU PUT THE MOLECULAR WEIGHTS INTO YOUR DICT AS FLOATS and not as strings. It'll make life easier later.

In [3]:
with open("aamw.txt") as fo:
    aamws = {line.strip().split()[0]:float(line.strip().split()[1]) for line in fo.readlines() if line[0] != "#"}
print(aamws)

{'I': 131.1736, 'L': 131.1736, 'K': 146.1882, 'M': 149.2124, 'F': 165.19, 'T': 119.1197, 'W': 204.2262, 'V': 117.1469, 'R': 174.2017, 'H': 155.1552, 'A': 89.0935, 'N': 132.1184, 'D': 133.1032, 'C': 121.159, 'E': 147.1299, 'Q': 146.1451, 'G': 75.0669, 'P': 115.131, 'S': 105.093, 'Y': 181.1894}


# Checking to see if a key is in the dictionary #

The in operator is the python3 way of checking to see if a key is in a dictionary. 

In [5]:
print("I" in aamws)
print("X" in aamws)

True
False


# .keys() gets the keys #

If you want to access a dictionary via its keys, you can extract just the keys with .keys()

print(my_dict.keys())

Print the list of keys for your amino acid molecular weights dictionary in the cell below.

In [6]:
print(aamws.keys())

dict_keys(['I', 'L', 'K', 'M', 'F', 'T', 'W', 'V', 'R', 'H', 'A', 'N', 'D', 'C', 'E', 'Q', 'G', 'P', 'S', 'Y'])


# .values() gets the values #

Just like .keys(), .values() gets just the values for your dictionary.

Do this below for your amino acid molecular weights dictionary.

In [7]:
print(aamws.values())

dict_values([131.1736, 131.1736, 146.1882, 149.2124, 165.19, 119.1197, 204.2262, 117.1469, 174.2017, 155.1552, 89.0935, 132.1184, 133.1032, 121.159, 147.1299, 146.1451, 75.0669, 115.131, 105.093, 181.1894])


# .items() gets the key:value pairs #

.items() works the same way as the other two methods -- but it gets a list of the paired key/value items in your dictionary. Try this below.

In [8]:
print(aamws.items())

dict_items([('I', 131.1736), ('L', 131.1736), ('K', 146.1882), ('M', 149.2124), ('F', 165.19), ('T', 119.1197), ('W', 204.2262), ('V', 117.1469), ('R', 174.2017), ('H', 155.1552), ('A', 89.0935), ('N', 132.1184), ('D', 133.1032), ('C', 121.159), ('E', 147.1299), ('Q', 146.1451), ('G', 75.0669), ('P', 115.131), ('S', 105.093), ('Y', 181.1894)])


# Answer this question #

What is the list that .items() returns a list of? list of tuples

# What can you do with these objects? #

The returned objects from .keys(), .values() and .items() look a lot like lists. But are they? To see what I mean, try to use one of the list methods that we used to change a list in place.  .sort(), or .reverse.

Try out dict.keys().sort() on your aamws dictionary. What happens? Are these items actually standard lists? Use type() to find out.

In [14]:
k = list(aamws.keys())
k.sort()
print(k)
aamws.keys().sort()

['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']


AttributeError: 'dict_keys' object has no attribute 'sort'

In [11]:
type(aamws.keys())

dict_keys

They are not standards lists, they are dict_keys which is an object/view of the dict that allows for iteration, multiple iteration, and set operations.

# Dict objects #

Even though you can't use standard list methods on these objects, you can use some of the built-in functions that we know and love.

In the cell below, try the following:

sorted() on dict.keys()
reversed(sorted()) on dict.keys()
sum() on dict.values()
len() on dict.items()
min() on dict.values()
max() on dict.values()
set() on dict.values()

In [13]:
print(sorted(aamws.keys()))
print(reversed(sorted(aamws.keys())))
print(sum(aamws.values()))
print(len(aamws.items()))
print(min(aamws.values()))
print(set(aamws.values()))

['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
<list_reverseiterator object at 0x7f9a329dddf0>
2738.0169
20
75.0669
{131.1736, 132.1184, 133.1032, 146.1882, 147.1299, 146.1451, 149.2124, 155.1552, 165.19, 174.2017, 181.1894, 75.0669, 204.2262, 89.0935, 105.093, 115.131, 117.1469, 119.1197, 121.159}


# Change dict objects to lists #

Using a function we've seen previously, how would you change a dict keys or dict values object to a regular list, if you needed to? Try it out in the cell below.

In [15]:
list(aamws.keys())

['I',
 'L',
 'K',
 'M',
 'F',
 'T',
 'W',
 'V',
 'R',
 'H',
 'A',
 'N',
 'D',
 'C',
 'E',
 'Q',
 'G',
 'P',
 'S',
 'Y']

# Getting a value when you have a key #

Last time we saw how to use .get() to get the value associated with a key. Use .get() to get the value associated with the amino acid cysteine (C) in the cell below.

In [16]:
aamws.get("C")

121.159

# Getting a key when you have a value #

We can also get a key when we know a value. This can be fraught with danger because sometimes we have values that are non-unique. But if we do something like, say, getting the min() or max() of a list of values, then we can go back and get the corresponding key by iterating over the items.

# Iterating over .items() #

.items() returns a tuple-like object, so you need to iterate over it with a statement that has a variable for each value in the tuple:

```for aa,mw in aamws.items():
    if mw == the_value_i_want:
        the_answer = aa```

In [18]:
for aa,mw in aamws.items():
    if mw == min(aamws.values()):
        min_key = aa
print(min_key)

G


# Iterating over .keys() #

To iterate over only the keys in your dictionary and get the associated values, you would use a loop like:

```for aa in aamws.keys():
    print(aamws[aa])```

In [25]:
for aa in aamws.keys():
    print(aa, aamws[aa])

I 131.1736
L 131.1736
K 146.1882
M 149.2124
F 165.19
T 119.1197
W 204.2262
V 117.1469
R 174.2017
H 155.1552
A 89.0935
N 132.1184
D 133.1032
C 121.159
E 147.1299
Q 146.1451
G 75.0669
P 115.131
S 105.093
Y 181.1894


# Sorting .keys() #

You can use sorted() and reversed() with dict object types. In the cell below, access your keys in sorted() and reversed(sorted()) order to print out the amino acid molecular weight list in alphabetical and reverse alphabetical order.

In [24]:
for aa in reversed(sorted(aamws.keys())):
    print(aa,":",aamws[aa])

Y : 181.1894
W : 204.2262
V : 117.1469
T : 119.1197
S : 105.093
R : 174.2017
Q : 146.1451
P : 115.131
N : 132.1184
M : 149.2124
L : 131.1736
K : 146.1882
I : 131.1736
H : 155.1552
G : 75.0669
F : 165.19
E : 147.1299
D : 133.1032
C : 121.159
A : 89.0935


# Iterating over .values() #

You can, but should you? Let's try.

```for mw in aamws.values():```

Starting from this point, and with what we learned above, would you have to do to go back and get the keys for each value in the list?

Would it be a good idea to get set(dict.values()) and iterate over that instead? Why? 

In [27]:
for mw in aamws.values():
    print(mw)
    print( [aa for aa,mw2 in aamws.items() if mw2 == mw] )
#not a good idea to get the set because the values are not unique, so you might lose some keys

131.1736
['I', 'L']
131.1736
['I', 'L']
146.1882
['K']
149.2124
['M']
165.19
['F']
119.1197
['T']
204.2262
['W']
117.1469
['V']
174.2017
['R']
155.1552
['H']
89.0935
['A']
132.1184
['N']
133.1032
['D']
121.159
['C']
147.1299
['E']
146.1451
['Q']
75.0669
['G']
115.131
['P']
105.093
['S']
181.1894
['Y']


#not a good idea to get the set because the values are not unique, so you might lose some keys

In [3]:
def gc_content(sequence):
    count = sequence.count("G") + sequence.count("C")
    prob = count/len(sequence)
    return prob

assert gc_content("AAATTT") == 0.0
assert gc_content("GGGCCC") == 1.0
assert gc_content("AAAGGG") == .50
assert gc_content("GGCCAA") == 2/3


In [5]:
import random
def powerball():
    winning_numbers = []
    for i in range(0,6):
        winning_numbers.append(random.randint(1,40))
    winning_numbers.append(random.randint(1,15))
    return winning_numbers

pb = powerball()
print(pb)

[9, 5, 14, 12, 21, 18, 1]


In [7]:
abc = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
for i in range(0,len(abc),3):
    print(abc[i:i+3])

ABC
DEF
GHI
JKL
MNO
PQR
STU
VWX
YZ


In [8]:
nums = [1,2,5,7,10,13,22,24,35,54,81,94]
for i in nums:
    print(i,i%3==0)

1 False
2 False
5 False
7 False
10 False
13 False
22 False
24 True
35 False
54 True
81 True
94 False


In [11]:
codons = ["AAA","AAA","BBB","CCC"]
unique = set(codons)
print(unique)
for i in unique:
    print(i,codons.count(i))

{'CCC', 'BBB', 'AAA'}
CCC 1
BBB 1
AAA 2


In [12]:
with open("aamw.txt") as fo:
    molweights = []
    for line in fo.readlines():
        if line[0] != "#":
            molweights.append(line.split()[1])
print(molweights)

['131.1736', '131.1736', '146.1882', '149.2124', '165.1900', '119.1197', '204.2262', '117.1469', '174.2017', '155.1552', '89.0935', '132.1184', '133.1032', '121.1590', '147.1299', '146.1451', '75.0669', '115.1310', '105.0930', '181.1894']


In [14]:
import random
colors = ["red", "yellow", "blue", "white", "black"]
random = random.choice(colors)
print(random)

black


In [15]:
colors = ["red", "yellow", "blue", "white", "black"]
my_string = ":".join(colors)
print(my_string)

red:yellow:blue:white:black


In [18]:
seq = "AAABBBBBBBBBBBBHGHGHGHGHGHGHGHGHHGHGHG"
barcode = "HG"
print(seq[:10].find(barcode) != -1)

False


In [22]:
seq = "0123456789"
i=5
print(seq[i-2:i+3])

34567


In [24]:
def getCodons():
    codonlist = []
    for i in ["A","T","G","C"]:
        for j in ["A","T","G","C"]:
            for k in ["A","T","G","C"]:
                codonlist.append(i+j+k)
    return codonlist
print(getCodons())

['AAA', 'AAT', 'AAG', 'AAC', 'ATA', 'ATT', 'ATG', 'ATC', 'AGA', 'AGT', 'AGG', 'AGC', 'ACA', 'ACT', 'ACG', 'ACC', 'TAA', 'TAT', 'TAG', 'TAC', 'TTA', 'TTT', 'TTG', 'TTC', 'TGA', 'TGT', 'TGG', 'TGC', 'TCA', 'TCT', 'TCG', 'TCC', 'GAA', 'GAT', 'GAG', 'GAC', 'GTA', 'GTT', 'GTG', 'GTC', 'GGA', 'GGT', 'GGG', 'GGC', 'GCA', 'GCT', 'GCG', 'GCC', 'CAA', 'CAT', 'CAG', 'CAC', 'CTA', 'CTT', 'CTG', 'CTC', 'CGA', 'CGT', 'CGG', 'CGC', 'CCA', 'CCT', 'CCG', 'CCC']
