# Reading Files

In [None]:
def head(file_name):
    """
    Prints the FIRST 10 lines of a file
    :param file_name: a string
    :return: None
    """
    f = open(file_name,"r") 
    lineList = f.readlines()
    f.close()
    s1 = "".join(lineList[0:10])
    print(s1)
    return

def tail(file_name):
    """
    Prints the LAST 10 lines of a file
    :param file_name: a string
    :return: None
    """
    f = open(file_name,"r") 
    lineList = f.readlines()
    f.close()
    s1 = "".join(lineList[len(lineList)-10:])
    print(s1)
    return

def print_even(file_name):
    """
    Prints the even numbered lines of a file
    :param file_name: a string
    :return: None
    """
    f = open(file_name,"r") 
    lineList = f.readlines()
    f.close()
    s1 = "".join(lineList[1::2])
    print(s1)
    return

def csv_list(file_name):
    """
    Read in a CSV file to a 2D array (In python it is a list of lists)
    :param file_name: a string
    :return: a list of lists
    """
    my_array=[]
    with open(file_name) as myfile:
        for line in myfile:
            row = line.split(',')
            my_array.append(row)
    return my_array

def get_csv_column(file_name, column):
    """
    Reads in a CSV file and returns a list of values belonging to the column specified
    :param file_name: a string
    :param column: a positive integer
    :return: a list
    """
    my_array=[]
    with open(file_name) as myfile:
        for line in myfile:
            row = line.split(',')
            if(len(row) < column):
                print('Column {} does not exist in row {}'.format(column - 1, row))
            else:
                my_array.append(row[column - 1])
    return my_array

def fasta_seqs(file_name):
    """
    Reads in a FASTA file and returns a list of only the sequences
    :param file_name: a string
    :return: a list of strings
    """
    seq_list = []
    with open(file_name, 'r') as infile:
        text = infile.read()
        seqs = text.split('>')
        for seq in seqs:
            try:
                x = seq.split('\n', 1)
                header = x[0]
                sequence = x[1].replace('\n', '')
                seq_list.append(sequence)
                
            except:
                pass
    return seq_list

def fasta_headers(file_name):
    """
    Reads in a FASTA file and returns a list of only the headers (Lines that start with ">")
    :param file_name: a string
    :return: a list of strings
    """
    header_list = []
    with open(file_name, 'r') as infile:
        text = infile.read()
        seqs = text.split('>')
        for seq in seqs:
            try:
                x = seq.split('\n', 1)
                header = x[0]
                sequence = x[1].replace('\n', '')
                header_list.append(header)
                
            except:
                pass
    return header_list


def fasta_dict(file_name):
    """
    Reads in a FASTA file and returns a dictionary of the format {header: sequence, ...}, where
    the sequence headers are keys and the sequence is the value
    :param file_name: a string
    :return: a dictionary
    """
    my_dict ={}
    with open(file_name, 'r') as infile:
        text = infile.read()
        seqs = text.split('>')
        for seq in seqs:
            try:
                x = seq.split('\n', 1)
                header = x[0]
                sequence = x[1].replace('\n', '')
                my_dict[header] = sequence
            except:
                pass
    return my_dict

def fastq_to_fasta(file_name, new_name=None):
    """
    Reads in a FASTQ file and writes it to a new FASTA file. This definition should also
    keep the same file name and change the extension to from .fastq to .fasta if new_name is not specified.
    EX: fastq_to_fasta('ecoli.fastq') should write to a new file called ecoli.fasta
    :param file_name: a string
    :param new_name: a string
    :return: None
    """
    oFile = file_name.split('.')
    oFile[0] += '.fasta'
    output = open(oFile[0],'w')
    with open(file_name, 'r') as infile:
        text = infile.read()
        seqs = text.split('@')
        for seq in seqs:
            try:
                x = seq.split('\n',1)
                header = x[0]
                seq1 = x[1].replace('\n', '')
                sequence = seq1.split('+')
                output.write('<'+header+ '\n' + sequence[0] + '\n')
            except:
                pass
    output.close()    
    return
