# Functions

In [7]:
import random

def some_func():
    print(random.choice(['A', 'C', 'G', 'T']))
    
for _ in range(5):
    some_func()

G
C
A
C
A


In [8]:
def f(a):
    return a ** 2
    
b = f(5.5)
print(b)

30.25


In [9]:
def func(arg1, arg2):
    return (arg1 + 2 * arg2) / 3
    
print(func(3, 6))
print(func(13, 13))

5.0
13.0


In [10]:
# Providing argument values by position vs. by keyword
print(func(1, arg2 = 2))
print(func(arg2 = 2, arg1 = 1))

1.6666666666666667
1.6666666666666667


In [11]:
print(func(arg1 = 13, 13))

SyntaxError: positional argument follows keyword argument (<ipython-input-11-0d9bfbe785b6>, line 1)

In [12]:
# Default value

def duplicate(string, number = 2):
    return string * number
    
print(duplicate('foo'))
print(duplicate('foo', 2))
print(duplicate('foo', number = 2))
print(duplicate('foo', 5))

foofoo
foofoo
foofoo
foofoofoofoofoo


In [13]:
def g(a, b = 2, c):
    return a + b + c

SyntaxError: non-default argument follows default argument (<ipython-input-13-0aa4fd032ef8>, line 1)

In [14]:
# It's better to explicitly return None, but this would work as well...

def calc(x, y):
    if x > y:
        return x - y
        
print(calc(5, 3))
print(calc(3, 5))

2
None


In [15]:
# Like everything else, functions are objects
f = calc
print(f(5, 3))
print(type(f))

2
<class 'function'>


In [16]:
# Functions can also be arguments of other functions

def convert_seq(seq, convert_letter_function):

    converted_seq = []
    
    for letter in seq:
        converted_seq.append(convert_letter_function(letter))

    return ''.join(converted_seq)
    
def dna_nt_to_rna_nt(nt):
    if nt == 'T':
        return 'U'
    else:
        return nt
    
def rna_nt_to_dna_nt(nt):
    if nt == 'U':
        return 'T'
    else:
        return nt
    
print(convert_seq('ATTCGA', dna_nt_to_rna_nt))
print(convert_seq('UUUAGU', rna_nt_to_dna_nt))

AUUCGA
TTTAGT


In [17]:
# Can have functions within functions

def dna_to_rna(dna_seq):

    def convert_nt(nt):
        if nt == 'T':
            return 'U'
        else:
            return nt
            
    rna_seq = ''
            
    for nt in dna_seq:
        rna_seq += convert_nt(nt)
        
    return rna_seq
    
print(dna_to_rna('AAAGAGAGAATGTTGC'))

AAAGAGAGAAUGUUGC


In [18]:
# Global vs. local variables

x = 5

def f():
    x = 3
    return x + 1
    
print(f())
print(x)

4
5


SyntaxError: invalid syntax (<ipython-input-26-270dc1698367>, line 2)

In [28]:
def f2():
    global var1, var2
    var1 += var2
    
f2()
print(var1)

17


In [31]:
# 'global' is required only when changing the actual variable

x = []

def f1():
    x.append(5)
    
def f2():
    global x
    x += [6]
    
print(x)
f1()
print(x)
f2()
print(x)

[]
[5]
[5, 6]


# Builtin functions

In [32]:
print(sum(range(5)))
print(max([1, 5, 6, 2]))
print(min(1, 5, 6, 2))
print(abs(-5))

10
6
1
5


In [34]:
s = 'string'
dir(s)


TypeError: count() takes at least 1 argument (0 given)

In [35]:
dir()

['In',
 'Out',
 '_',
 '_33',
 '__',
 '___',
 '__builtin__',
 '__builtins__',
 '__doc__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_dh',
 '_i',
 '_i1',
 '_i10',
 '_i11',
 '_i12',
 '_i13',
 '_i14',
 '_i15',
 '_i16',
 '_i17',
 '_i18',
 '_i19',
 '_i2',
 '_i20',
 '_i21',
 '_i22',
 '_i23',
 '_i24',
 '_i25',
 '_i26',
 '_i27',
 '_i28',
 '_i29',
 '_i3',
 '_i30',
 '_i31',
 '_i32',
 '_i33',
 '_i34',
 '_i35',
 '_i4',
 '_i5',
 '_i6',
 '_i7',
 '_i8',
 '_i9',
 '_ih',
 '_ii',
 '_iii',
 '_oh',
 'b',
 'calc',
 'convert_seq',
 'dna_nt_to_rna_nt',
 'dna_to_rna',
 'duplicate',
 'exit',
 'f',
 'f1',
 'f2',
 'func',
 'get_ipython',
 'quit',
 'random',
 'rna_nt_to_dna_nt',
 's',
 'some_func',
 'var1',
 'var2',
 'x']

In [36]:
dir(__builtin__)

['ArithmeticError',
 'AssertionError',
 'AttributeError',
 'BaseException',
 'BlockingIOError',
 'BrokenPipeError',
 'BufferError',
 'ChildProcessError',
 'ConnectionAbortedError',
 'ConnectionError',
 'ConnectionRefusedError',
 'ConnectionResetError',
 'EOFError',
 'Ellipsis',
 'EnvironmentError',
 'Exception',
 'False',
 'FileExistsError',
 'FileNotFoundError',
 'FloatingPointError',
 'GeneratorExit',
 'IOError',
 'ImportError',
 'IndentationError',
 'IndexError',
 'InterruptedError',
 'IsADirectoryError',
 'KeyError',
 'KeyboardInterrupt',
 'LookupError',
 'MemoryError',
 'ModuleNotFoundError',
 'NameError',
 'None',
 'NotADirectoryError',
 'NotImplemented',
 'NotImplementedError',
 'OSError',
 'OverflowError',
 'PermissionError',
 'ProcessLookupError',
 'RecursionError',
 'ReferenceError',
 'RuntimeError',
 'StopAsyncIteration',
 'StopIteration',
 'SyntaxError',
 'SystemError',
 'SystemExit',
 'TabError',
 'TimeoutError',
 'True',
 'TypeError',
 'UnboundLocalError',
 'UnicodeDecode

In [37]:
for s in dir(__builtin__):
    if 'set' in s.lower():
        print(s)

ConnectionResetError
frozenset
set
setattr


In [38]:
help(reversed)

Help on class reversed in module builtins:

class reversed(object)
 |  reversed(sequence, /)
 |  
 |  Return a reverse iterator over the values of the given sequence.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __length_hint__(...)
 |      Private method returning an estimate of len(list(it)).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [39]:
sum?

In [40]:
str.lower?

# Modules

In [6]:
import random
print(random.random())

0.5325282430398771


In [7]:
rrr1 = random
print(rrr1.random())

import random as rrr2
print(rrr2.random())

0.863205634776821
0.5357780093710732


In [8]:
from random import randint
print(randint(0, 10))

from random import randint, choice as c
print(c('acde'))

from random import * # Not recommended!
print(random())

3
a
0.6004850898366747


In [9]:
import sys
print(sys.version)
print('*' * 20)
print(sys.path)

3.7.6 (default, Jan  8 2020, 20:23:39) [MSC v.1916 64 bit (AMD64)]
********************
['C:\\Users\\yuuki\\Downloads', 'C:\\Users\\yuuki\\anaconda3\\python37.zip', 'C:\\Users\\yuuki\\anaconda3\\DLLs', 'C:\\Users\\yuuki\\anaconda3\\lib', 'C:\\Users\\yuuki\\anaconda3', '', 'C:\\Users\\yuuki\\anaconda3\\lib\\site-packages', 'C:\\Users\\yuuki\\anaconda3\\lib\\site-packages\\win32', 'C:\\Users\\yuuki\\anaconda3\\lib\\site-packages\\win32\\lib', 'C:\\Users\\yuuki\\anaconda3\\lib\\site-packages\\Pythonwin', 'C:\\Users\\yuuki\\anaconda3\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\yuuki\\.ipython']


In [10]:
import math
print(math.exp(2))
print(math.e ** 2)
print(math.pi)
print(math.sin(math.pi / 6))
print(math.log(100))
print(math.log10(100))
print(math.log2(100))
print(math.log(81) / math.log(3))

7.38905609893065
7.3890560989306495
3.141592653589793
0.49999999999999994
4.605170185988092
2.0
6.643856189774724
4.0


# Useful bound functions

In [11]:
string = 'The sea was wet as wet could be, the sands were dry as dry.'

print(string.upper())
print(string.lower())

THE SEA WAS WET AS WET COULD BE, THE SANDS WERE DRY AS DRY.
the sea was wet as wet could be, the sands were dry as dry.


In [12]:
print(string.find('the'))
print(string.index('the'))

print(string.find('seas'))
print(string.index('seas'))

33
33
-1


ValueError: substring not found

In [13]:
print(string.startswith('The sea'))
print(string.startswith('The seas'))
print(string.endswith('dry'))

True
False
False


In [14]:
print(string.replace('as', 'more than'))
print(string.replace(' as ', ' more than '))

The sea wmore than wet more than wet could be, the sands were dry more than dry.
The sea was wet more than wet could be, the sands were dry more than dry.


In [15]:
print(string.split(' '))
print(string.split())
print(string.split(' as '))

['The', 'sea', 'was', 'wet', 'as', 'wet', 'could', 'be,', 'the', 'sands', 'were', 'dry', 'as', 'dry.']
['The', 'sea', 'was', 'wet', 'as', 'wet', 'could', 'be,', 'the', 'sands', 'were', 'dry', 'as', 'dry.']
['The sea was wet', 'wet could be, the sands were dry', 'dry.']


In [None]:
long_string = \
'''The sea was wet as wet could be,
The sands were dry as dry.
You could not see a cloud, because
No cloud was in the sky:
No birds were flying overhead--
There were no birds to fly.'''

print(long_string.splitlines())
print(long_string.split('\n'))

In [None]:
print('123456'.isdigit()) # True
print('A123456'.isdigit()) # False
print('123 456'.isdigit()) # False

print('AaBbCc'.isalpha()) # True
print('A123456'.isalpha()) # False
print('A '.isalpha()) # False

print('123 abc !'.islower()) # True
print('123 abc A'.islower()) # False

print('123 ABC !'.isupper()) # True
print('123 ABC a'.isupper()) # False

print(' \t\r\n'.isspace()) # True
print(' \t\r\n1'.isspace()) # False

In [16]:
print('   GAAGACT   \t\r\n'.strip())
print('NNGATGCGNNAGATGGGTNNNN'.strip('N'))

GAAGACT
GATGCGNNAGATGGGT


In [None]:
codons = ['ATG', 'TGT', 'AGG', 'GAA', 'TGT', 'ATG', 'ATC', 'TAG']
print(codons)

print(codons.count('TGT'))

print(codons.pop())
print(codons)

print(codons.pop(1))
print(codons)

In [None]:
codons = ['ATG', 'TGT', 'AGG', 'GAA', 'TGT', 'ATG', 'ATC', 'TAG']
aa_seq = 'MCRECMI*'

codon_and_aa = list(zip(codons, aa_seq))
print(codon_and_aa)

codon_table = dict(codon_and_aa)
print(codon_table)

In [None]:
print(list(zip(codons, aa_seq[:-1])))
print(list(zip(range(5), ['zero', 'one', 'two', 'three', 'four'], [None, None, True, True, False])))

In [None]:
print(list(enumerate(codons)))

In [None]:
for i, codon in enumerate(codons):
    print('Codon %d: %s' % (i, codon))

In [None]:
for i, codon_and_aa in enumerate(zip(codons, aa_seq)):
    codon, aa = codon_and_aa
    print('Codon %d: %s (%s)' % (i, codon, aa))

In [None]:
a, (b, c) = [1, [2, 3]]
print('a = %d, b = %d, c = %d' % (a, b, c))

In [None]:
for i, (codon, aa) in enumerate(zip(codons, aa_seq)):
    print('Codon %d: %s (%s)' % (i, codon, aa))

# Sorting

In [None]:
values = [1.0, 3.0, -1.2, 5.61, 2.11, 0.002, -2]
print(sorted(values))
print(sorted(values, reverse = True))

In [None]:
print(values)
values.sort()
print(values)

In [None]:
print(sorted(values, key = abs))
print(sorted(values, key = abs, reverse = True))

In [None]:
tuples = [(1, 2), (-1, 3), (0, 2), (5, 0), (3, 4), (0, -1)]
print(sorted(tuples))

In [None]:
def get_value(pair):
    return pair[1]
    
print(sorted(tuples, key = get_value))

In [None]:
animals = ['dog', 'cat', 'mouse', 'elephant']
print(sorted(animals))
print(sorted(animals, key = len))

# Files

In [None]:
f = open(r'c://downloads/BRCA1.txt', 'r')
print(f)
print('*' * 20)

content = f.read()
print(content)

In [None]:
print(f.read())

In [None]:
# Never forget to close a file!
f.close()

In [None]:
# Parsing is a common programming routine

meta = {}
exons = []

for line in content.splitlines():
    
    key, value = line.split('\t')
    
    if key == 'exon':
        start, end = value.split('..')
        exons.append((int(start), int(end)))
    else:
        meta[key] = value
        
        
# Print the parsed data to see we got it right

for key, value in meta.items():
    print('%s: %s' % (key, value))

print('*' * 20)
print('Exons:')
print(exons)

In [None]:
exon_lengths = []

for start, end in exons:
    exon_lengths += [end - start + 1]
    
print('Exons: %d' % len(exons))
print('Min exon length: %d' % min(exon_lengths))
print('Max exon length: %d' % max(exon_lengths))
print('Average exon length: %.2f' % (sum(exon_lengths) / len(exons)))

In [None]:
f = open(r'c://downloads/exon_lengths.txt', 'w')
f.write(str(exon_lengths)[1:-1])
f.close()

# More useful modules

In [None]:
import os

print(os.path.join(r'C://temp/dir/', 'something/file.txt'))
print(os.listdir(r'C://'))

In [None]:
os.system('calc')

In [None]:
from collections import Counter, defaultdict

In [None]:
aa_seq = 'MQAEQTRCAAARGSAEMESLWHAAPGDEEIPLHPPPTPGAMSLESDSSLDTLAEKIECDLMDLLGDMGPPCDIDEEEDQLFAEALPPLYS'
aa_count = Counter()

print(aa_count)

for aa in aa_seq:
    aa_count[aa] += 1
    
print(aa_count)

In [None]:
print(Counter(aa_seq))

In [None]:
aa_count['A'] += 1
print(aa_count)

aa_count['A'] += 3
print(aa_count)

In [None]:
print(aa_count['W'])
print(aa_count['*']) # Doesn't exist in the counter.

In [None]:
for aa, count in aa_count.most_common()[:5]:
    print('%s: %d' % (aa, count))

In [None]:
aa_count = Counter(aa_seq)
print(aa_count)
aa_count.update('AALLL')
print(aa_count)

In [None]:
print(dict(aa_count))

In [None]:
print(Counter('the sea was wet as wet could be the sands were dry as dry'.split()))

In [None]:
aa_positions = defaultdict(list)

for i, aa in enumerate(aa_seq):
    aa_positions[aa].append(i)
    
print(aa_positions)
print('*' * 20)
print(aa_positions['Z'])

In [None]:
print(dict(aa_positions))

In [None]:
def create_empty_counter():
    return Counter()

next_aa_counter = defaultdict(create_empty_counter)

for i, aa in enumerate(aa_seq[:-1]):
    next_aa = aa_seq[i + 1]
    next_aa_counter[aa][next_aa] += 1

print(next_aa_counter)
print('*' * 20)
print(next_aa_counter['A'])
print(next_aa_counter['A']['E'])
print(next_aa_counter['Z']['E'])
print('*' * 20)
print(next_aa_counter)