# Strings, File I/O and Interactions

### Strings can do operations on themselves

    .lower(), .upper(), .capitalize()

In [4]:
"funKY tOwn".capitalize()

'Funky town'

In [6]:
"Funky tOwn".lower()

'funky town'

In [7]:
"fUNKY tOWN".swapcase()

'Funky Town'

How you call this:

 >.split([sep [,maxsplit]])

In [12]:
"funKY toWN".split()

['funKY', 'toWN']

In [9]:
"funKY tOwn".capitalize().split()

['Funky', 'town']

In [10]:
[x.capitalize() for x in "funKY tOwn".split()]

['Funky', 'Town']

In [13]:
"I want to take you to, funKY tOwn".split("u")

['I want to take yo', ' to, f', 'nKY tOwn']

In [14]:
"I want to take you to, funKY tOwn".split("you")

['I want to take ', ' to, funKY tOwn']

## .strip(), .join(), .replace()


In [17]:
csv_string = 'Dog, Cat, Spam, Conference, 2, 3.14   \n\t'
csv_string.strip()

'Dog, Cat, Spam, Conference, 2, 3.14'

In [18]:
a = 'spam\n'

In [19]:
a.strip('s')

'pam\n'

In [20]:
clean_list = [x.strip() for x in csv_string.split(',')]
print(clean_list)

['Dog', 'Cat', 'Spam', 'Conference', '2', '3.14']


### .join() allows you to glue a list of strings together with a certain string



In [24]:
print(', '.join(clean_list))

Dog, Cat, Spam, Conference, 2, 3.14


In [25]:
print('\t'.join(clean_list))

Dog	Cat	Spam	Conference	2	3.14


### .replace() strings in strings



In [27]:
csv_string = 'Dog, Cat, Spam, Conference, 2, 3.14   \n\t'
alt_csv = csv_string.replace(' ', '')
print(alt_csv)

Dog,Cat,Spam,Conference,2,3.14
	


In [29]:
print(csv_string.strip().replace(' ', '').replace(',', '\t'))

Dog	Cat	Spam	Conference	2	3.14


### .find()

incredibly useful searching, returning the index of the search

In [30]:
s = 'My funny Christmas'
s.find('y')

1

In [36]:
s.find?

In [34]:
s.find('funny')

3

In [35]:
s[s.find('funny'):]

'funny Christmas'

In [39]:
s[s.find('Christmas'):-3]

'Christ'

In [41]:
s.find('k')

-1

In [44]:
ss = [s, 'Argentine', 'American', 'Quarentine', 'Manafort']
for thestring in ss:
    if thestring.find('tine') != -1:
        print('' + str(thestring) + ' contains tine.')

Argentine contains tine.
Quarentine contains tine.


### string module

exposes useful variables and functions

In [45]:
import string

In [46]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [47]:
string.digits

'0123456789'

In [49]:
string.ascii_uppercase

'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

### String Formatting

casting using `str()` is very limited Python gives access to C-like string formatting

       usage:  “%(format)” % (variable)

In [53]:
import math
print('My favourite integer is %i and my favourite float is %f,\
 which to three decimal place is %0.3f and in exponential form is %e' \
     %(3, math.pi, math.pi, math.pi))

My favourite integer is 3 and my favourite float is 3.141593, which to three decimal place is 3.142 and in exponential form is 3.141593e+00


common formats:

    f (float), i (integer), s (string), g (nicely formatting floats)

http://docs.python.org/release/2.7.2/library/stdtypes.html#string-formatting-operations

 

### String Formatting

    % escapes “%”

In [55]:
print('I promised to give 100%% effort whenever asked of')

I promised to give 100%% effort whenever asked of


    + and zero-padding


In [57]:
print('%f\n%+f\n%f\n%10f\n%10s' %(math.pi, math.pi, -1.0 * math.pi, math.pi, 'pi'))

3.141593
+3.141593
-3.141593
  3.141593
        pi


### String Formatting

the (somewhat) preferred way

    is string.format(value0,value1,....)

In [59]:
'On {}, I feel {}' .format('Friday', 'groovy')

'On Friday, I feel Friday'

In [60]:
'on {0}, I feel {0}'.format("Saturday","groovy")

'on Saturday, I feel Saturday'

In [62]:
'on {0}, I feel {1}'.format("Saturday","groovy")

'on Saturday, I feel groovy'

In [63]:
'on {1}, I feel {0}'.format("Saturday","groovy")

'on groovy, I feel Saturday'

you can assign by argument position or by name



In [67]:
'{desire} to {place}'.format(desire = 'Fly me', 
                            place = 'The moon')

'Fly me to The moon'

In [69]:
'{desire} to {place} or else I wouldn\'t visit {place}'. format(
                                                    desire = 'Fly me',
                                                    place = 'The Moon')

"Fly me to The Moon or else I wouldn't visit The Moon"

In [72]:
f = {'desire': 'I want to take you', 'place': 'funky town'}
'{desire} to {place}'.format(**f)

'I want to take you to funky town'

### Formatting comes after a colon (:)


In [81]:
print('%03.2f' %3.14159)

3.14


In [88]:
('%03.2f' % 3.14159) == '{:03.2f}'.format(3.14159)

True

In [99]:
'{0:03.2f}'.format(3.14159, 42)

'3.14'

In [100]:
x = 10
strformat = '{1:<%i.2f}' %x
print(strformat)

{1:<10.2f}


In [101]:
strformat.format(3.14159, 42)

'42.00     '

In [102]:
# format also supports binary numbers
"int: {0:d};  hex: {0:x};  oct: {0:o};  bin: {0:b}".format(42)

'int: 42;  hex: 2a;  oct: 52;  bin: 101010'

In [103]:
'{0:b}'.format(15)

'1111'

## File I/O (read/write)

    .open() and .close() are builtin functions

In [104]:
%%file mydata.dat
This is my first file I/O. Zing!

Writing mydata.dat


In [105]:
file_stream = open('mydata.dat', 'r'); print(type(file_stream))
file_stream.close

<class '_io.TextIOWrapper'>


<function TextIOWrapper.close()>

    open modes: r (read), w (write), r+ (read + update), rb (read as a binary stream, ...), rt (read as text file)

    Writing data: .write() or .writelines()

In [106]:
f = open('test.dat', 'w')
f.write('This is my second I/O. Zang!')
f.close()
!cat test.dat

This is my second I/O. Zang!

In [108]:
f = open('test.dat', 'w')
f.writelines(["a = ['This is my third file I/O. Zang!']", ' Take that Dr'])
f.close()
!cat test.dat

a = ['This is my third file I/O. Zang!'] Take that Dr

    Likewise, there is .readlines() and .read()



In [110]:
f= open("mydata.dat","r")
data = f.readlines()
f.close() ; print(data)

['This is my first file I/O. Zing!']


In [112]:
type(data)

list

In [113]:
%%file tabbify_my_csv.py
"""
small copy program that turns a csv file into a tabbed file

"""
import os

def tabbify(infilename,outfilename,ignore_comments=True,comment_chars="#;/"):
    """
INPUT: infilename
OUTPUT: creates a file called outfilename
    """
    if not os.path.exists(infilename):
        return  # do nothing if the file isn't there
    f = open(infilename,"r")
    o = open(outfilename,"w")
    inlines = f.readlines() ; f.close()
    outlines = []
    for l in inlines:
        if ignore_comments and (l[0] in comment_chars):
            outlines.append(l)
        else:
            outlines.append(l.replace(",","\t"))
    o.writelines(outlines) ; o.close()

Writing tabbify_my_csv.py


In [114]:
!cat google_share_price.csv |head

# Date,Open,High,Low,Close,Volume,Adj Close
2008-10-14,393.53,394.50,357.00,362.71,7784800,362.71
2008-10-13,355.79,381.95,345.75,381.02,8905500,381.02
2008-10-10,313.16,341.89,310.30,332.00,10597800,332.00
2008-10-09,344.52,348.57,321.67,328.98,8075000,328.98
2008-10-08,330.16,358.99,326.11,338.11,11826400,338.11
2008-10-07,373.33,374.98,345.37,346.01,11054400,346.01
2008-10-06,373.98,375.99,357.16,371.21,11220600,371.21
2008-10-03,397.35,412.50,383.07,386.91,7992900,386.91
2008-10-02,409.79,409.98,386.00,390.49,5984900,390.49


In [115]:
%run tabbify_my_csv.py
tabbify("google_share_price.csv","google_share_price.tsv")

In [116]:
!cat google_share_price.csv |head

# Date,Open,High,Low,Close,Volume,Adj Close
2008-10-14,393.53,394.50,357.00,362.71,7784800,362.71
2008-10-13,355.79,381.95,345.75,381.02,8905500,381.02
2008-10-10,313.16,341.89,310.30,332.00,10597800,332.00
2008-10-09,344.52,348.57,321.67,328.98,8075000,328.98
2008-10-08,330.16,358.99,326.11,338.11,11826400,338.11
2008-10-07,373.33,374.98,345.37,346.01,11054400,346.01
2008-10-06,373.98,375.99,357.16,371.21,11220600,371.21
2008-10-03,397.35,412.50,383.07,386.91,7992900,386.91
2008-10-02,409.79,409.98,386.00,390.49,5984900,390.49


In [117]:
!cat google_share_price.tsv |head

# Date,Open,High,Low,Close,Volume,Adj Close
2008-10-14	393.53	394.50	357.00	362.71	7784800	362.71
2008-10-13	355.79	381.95	345.75	381.02	8905500	381.02
2008-10-10	313.16	341.89	310.30	332.00	10597800	332.00
2008-10-09	344.52	348.57	321.67	328.98	8075000	328.98
2008-10-08	330.16	358.99	326.11	338.11	11826400	338.11
2008-10-07	373.33	374.98	345.37	346.01	11054400	346.01
2008-10-06	373.98	375.99	357.16	371.21	11220600	371.21
2008-10-03	397.35	412.50	383.07	386.91	7992900	386.91
2008-10-02	409.79	409.98	386.00	390.49	5984900	390.49


# Advanced Interactions