In [1]:
import numpy as np

Reference code from Mike Harms

```
def load_pdb(pdb_file):
    
    f = open(pdb_file,'r')
    lines = f.readlines()
    f.close()
    
    all_coord = []
    for l in lines:
        if l[0:4] == "ATOM" and l[13:16] == "CA ":
            coord = [float(l[(30 + i*8):(38 + i*8)]) for i in range(3)]
            all_coord.append(coord)
            
    return np.array(all_coord)
```

```
import gzip

get_line = False
seqs = {}
with gzip.open("files/example.fastq.gz") as f:
    for l in f:
        l_ascii = l.decode("ascii")
        if l_ascii[0] == "@":
            get_line = True
            continue
        if get_line:
            try:
                seqs[l_ascii.strip()] += 1
            except KeyError:
                seqs[l_ascii.strip()] = 1
            get_line = False
```

In [5]:
file = "200327_p0.01.txt"

def RNAseq_tsv(file):
    """
    Takes RNA-seq data from Devin Dinwiddie, 
    reads it in 
        (while removing the first header row),
    and appends lines to lists (up, dn, wut) for genes that are upregulated, downregulated, or inconclusively affected 
    as compared to wildtype (expression levels in column 9, indexed in the 'for' loop as l[9])
    
    RETURNS
    three lists (up, dn, wut) followed by a string description of the lists.
    
    """
    f = open("200327_p0.01.txt", 'r')
    lines = f.readlines()
    f.close()
    
    lines.remove(lines[0])
    
    up = []
    dn = []
    wut = []
    for l in lines:
        if l[10] > l[9] and l[11] > l[9]:
            up.append(l[0:-2])
        elif l[10] < l[9] and l[11] < l[9]:
            dn.append(l[0:-2])
        else:
            wut.append(l[0:-2])
            
    return(up, dn, wut, str(len(lines)) + " genes:" + "\n" + str(len(up)) + " genes upregulated" + "\n" + str(len(dn)) + " genes downregulated" + "\n" + str(len(wut)) + " genes inconclusived.")

In [6]:
# TEST
RNAseq_tsv(file)

(['2\tYBR232C\t9.8483063\t6.334332588\t2.4480511\t2.587500132\t0.009667516\t0.301887884\t0\t17.9950871\t11.54983',
  '3\tYNL105W\t10.543813\t6.422243308\t2.4271361\t2.646016951\t0.008144574\t0.281729364\t0\t11.9967248\t19.63471',
  '4\tYDR193W\t19.4482903\t7.307859168\t2.2737503\t3.214011281\t0.001308945\t0.085531366\t0\t24.8503584\t33.49451',
  '5\tYOR248W\t17.7469105\t7.171072236\t2.310109\t3.104213766\t0.001907854\t0.105836574\t0\t16.2812693\t36.95946',
  '6\tYAR068W\t26.7507459\t7.765312116\t2.2337081\t3.476422061\t0.000508152\t0.047390688\t0\t28.2779941\t51.97424',
  '8\tYFL012W\t22.1407693\t5.036850664\t1.7232533\t2.922873096\t0.003468178\t0.158661071\t0.9987193\t37.7039921\t27.71959',
  '9\tYKL147C\t21.4950146\t4.988993644\t1.7254802\t2.891365411\t0.003835718\t0.171408669\t0.9987193\t29.9918119\t33.49451',
  '66\tQ0085\t135.033471\t3.704931902\t0.6778497\t5.465713315\t4.61E-08\t4.24E-05\t14.9807892\t236.5068595\t153.61276',
  '168\tsnR13\t790.1830288\t1.378605056\t0.5242557\t2.6

In [None]:
print("holy shit that's the first chunk of code that's worked in months")

In [None]:
#TEST FOR NEXT CHUNK:
print(up)

['7\tYNL277W-A\t20.1413152\t4.896327771\t1.7396117\t2.814609522\t0.004883653\t0.200154697\t0.9987193\t31.7056297\t27.71959', '32\tsnR78\t35.0570591\t3.045106167\t0.9934903\t3.065058915\t0.002176271\t0.116702545\t5.9923157\t40.2747188\t58.90414', '99\tsnR47\t241.5810331\t3.345661112\t0.5551482\t6.02660882\t1.67E-09\t1.80E-06\t33.9564556\t308.487208\t382.29943', '100\tYBR299W\t95.9894995\t1.858483346\t0.6520574\t2.850183719\t0.004369398\t0.186205813\t34.9551749\t152.5297862\t100.48353', '101\tYPR001W\t101.6480929\t1.855366238\t0.6487067\t2.860100161\t0.004235072\t0.1816846\t36.9526134\t102.8290693\t165.16259', '102\tYIR028W\t117.8155215\t2.051220805\t0.6976651\t2.940122459\t0.003280826\t0.15523612\t37.9513327\t89.1185268\t226.37670', '103\tYNL117W\t135.1746644\t2.23362546\t0.5885055\t3.795419814\t0.000147394\t0.01935672\t38.950052\t171.3817822\t195.19215', '104\tYOR378W\t130.5797533\t2.176652445\t0.6162336\t3.532187027\t0.000412138\t0.041439164\t38.950052\t137.9623347\t214.82687', '105\t

In [11]:
# add return function to get those lists out of the function too.
outputmebbe = RNAseq_tsv(file)
up = outputmebbe[0]
dn = outputmebbe[1]
wut = outputmebbe[2]
message = outputmebbe[3]

In [15]:
#https://docs.scipy.org/doc/numpy/reference/generated/numpy.fromstring.html


#want to iterate items in a list to rows in an array
for i in wut:
    wut[i] = np.fromstring(wut,sep='\t')
## then separate rows into separate items delimited by tabs
print(wut)

  


IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices