*********************************************************************************************************
# A Tour of Python 3  
version 1.0.1  
Authors: Phil Pfeiffer, Zack Bunch, and Feyisayo Oyeniyi  
East Tennessee State University  
Last updated June 2021  
*********************************************************************************************************

# 9.  File Processing  
 9.1 [Overview](#File-Processing-Overview)  
 9.2 [Identifying the next byte to read and/or write](#File-Processing-Tell)  
 9.3 [Specifying the next location to read and/or write](#File-Processing-Seek)

##  9.1  Overview <a name='File-Processing-Overview'></a>

Python's run-time library includes routines for creating, reading, and deleting text (txt) and binary (bin) files, along with routines for checking a file's status. Python status-checking routines include two files for checking for a file's presence: `os.path.exists` and `os.path.isfile`.

In [None]:
# 9.1.1.a showing the creation, reading, and deletion of a text file, along with logic to avoid
# overwriting an existing file system object and to confirm that file creation and deletion succeeded.
# version 1:  using close() to finish processing.

# os - supporting Python library for operating system operations
## os.path.exists - check if a path expression denotes an object of some sort
## os.path.isfile - check if a path expression denotes a file
## os.remove - remove the specified file

import os 

OUTFILE = 'data.txt'   # file to be written

if os.path.exists( OUTFILE ):
  print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
  print( f'creating a text file named {OUTFILE}' )
  try:
    f = open( OUTFILE, 'w+' )
    f.write( 'Data science is evolving really fast.\n' )
    f.write( 'Python has become a de facto standard language for data analysis.\n' )
    f.close( )
    if os.path.isfile(OUTFILE):
      print( f'{OUTFILE} created - displaying contents\n' )
      f = open( OUTFILE, 'r' )
      while True:
        line = f.readline()
        if line == '': break
        print( '>>  ', line, end='' )
      f.close( )
    else:
      raise FileNotFoundError(OUTFILE)
    print( f'\ncleaning up - removing {OUTFILE}' )
    os.remove( OUTFILE )
    if os.path.exists(OUTFILE):
      raise FileExistsError(OUTFILE)
  except Exception as err:
    print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

In [None]:
# 9.1.1.b showing the creation, reading, and deletion of a text file, along with logic to avoid
# overwriting an existing file system object and to confirm that file creation and deletion succeeded.
# version 2:  using 'with' to process the file, which eliminates the need for close().

# os - supporting Python library for operating system operations
## os.path.exists - check if a path expression denotes an object of some sort
## os.path.isfile - check if a path expression denotes a file
## os.remove - remove the specified file

import os 

OUTFILE = 'data.txt'   # file to be written

if os.path.exists( OUTFILE ):
  print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
  print( f'creating a text file named {OUTFILE}' )
  try:
    with open( OUTFILE, 'w+' ) as f:
      f.write( 'Data science is evolving really fast.\n' )
      f.write( 'Python has become a de facto standard language for data analysis.\n' )
    if os.path.isfile(OUTFILE):
      print( f'{OUTFILE} created - displaying contents\n' )
      with open( OUTFILE, 'r' ) as f:
        while True:
          line = f.readline()
          if line == '': break
          print( '>>  ', line, end='' )
    else:
      raise FileNotFoundError(OUTFILE)
    print( f'\ncleaning up - removing {OUTFILE}' )
    os.remove( OUTFILE )
    if os.path.exists(OUTFILE):
      raise FileExistsError(OUTFILE)
  except Exception as err:
    print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

In [None]:
# 9.1.2 showing the creation, reading, and deletion of a binary file, along with logic to avoid
# overwriting an existing file system object and to confirm that file creation and deletion succeeded.
# version 1:  using close() to finish processing.

# os - supporting Python library for operating system operations
## os.path.exists - check if a path expression denotes an object of some sort
## os.path.isfile - check if a path expression denotes a file
## os.remove - remove the specified file

import os 

OUTFILE = 'data.bin'   # file to be written

if os.path.exists( OUTFILE ):
  print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
  print( f'creating a binary file named {OUTFILE}' )
  try:
    f = open( OUTFILE, 'wb' )
    f.write( bytearray( [1, 2, 3, 4, 5]) )
    f.close( )
    if os.path.isfile(OUTFILE):
      print( f'{OUTFILE} created - displaying contents\n' )
      f = open( OUTFILE, 'rb' )
      f_contents = []
      while True:
        nextitem = f.read(1)
        if nextitem == b'': break
        f_contents.append( nextitem[0] )
      print( ', '.join( [str(item) for item in f_contents] ) )
      f.close( )
    else:
      raise FileNotFoundError(OUTFILE)
    print( f'\ncleaning up - removing {OUTFILE}' )
    os.remove( OUTFILE )
    if os.path.exists(OUTFILE):
      raise FileExistsError(OUTFILE)
  except Exception as err:
    print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

<span style='color:blue'>&#128073;&ensp;&ensp;**Exercise 9.1.1:**

</span><span style='color:navy'>In the following code cell, redo the previous example, using Python's `with` statement to eliminate the calls to f.close().</span>

<span style='color:blue'>&#128073;&ensp;&ensp;**Exercise 9.1.2:**

</span><span style='color:navy'>Jupyter Notebook's kernel was developed by the [Interactive Python Project](https://ipython.org). This kernel supports a collection of "magic" commands: extensions to the standard Python interpreter whose names start with `%` (line-oriented commands) or `%%` (cell-oriented commands). Can any of the line-oriented commands be used in place of Python's file primitives to create and populate these exercises' files? What about file-oriented commands?  Explain. </span>
***


***


Python supports the following modes for opening files:  

 &ensp;&ensp; `r` - opens a file to read-only mode<br>
 &ensp;&ensp; `r+` - opens a file in read and write modes <br>
 &ensp;&ensp; `rb` - opens a file in binary and read-only modes <br> 
 &ensp;&ensp; `w` -  allows write-level access to a file <br> 
 &ensp;&ensp; `w+` - opens a file in read and write modes <br>
 &ensp;&ensp; `wb` - opens a file for writing in binary format


##  9.2  Identifying the next byte to read or write <a name='File-Processing-Tell'></a>

Python uses a cursor called the file pointer to track the next byte to read and/or write. 
`tell()` returns the file pointer's current position in a file.


In [None]:
# 9.2 showing the use of tell to track progress through a file.

import os 

OUTFILE = 'data.txt'   # file to be written

if os.path.exists( OUTFILE ):
   print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
   print( f'creating a text file named {OUTFILE}' )
   try:
     with open( OUTFILE, 'w+' ) as f:
       f.write( 'Data science is evolving really fast.\n' )
       f.write( 'Python has become a de facto standard language for data analysis.\n' )
     if os.path.isfile(OUTFILE):
       print( f'{OUTFILE} created - displaying contents\n' )
       with open( OUTFILE, 'r' ) as f:
         while True:
           start_position = f.tell()
           line = f.readline()
           if line == '':
             print( f'at char {start_position}>>   <EOF>' )
             break
           print( f'at char {start_position}>>   ', line, end='' )
     else:
       raise FileNotFoundError(OUTFILE)
     print( f'\ncleaning up - removing {OUTFILE}' )
     os.remove( OUTFILE )
     if os.path.exists(OUTFILE):
       raise FileExistsError(OUTFILE)
   except Exception as err:
     print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

##  9.3  Specifying the next location to read and/or write <a name='File-Processing-Seek'></a>

`seek()` changes a file's next byte position to value of its argument. `seek(0)` seeks to the start of a file. `seek(n, 1)` moves the current position n characters, relative to the current file position. `seek(n, 1)` with a negative (i.e., backward) offset of n only works when a file is opened in binary mode.

These examples print a file's contents in reverse order. They use the `st_size` field from the descriptor returned by `os.stat` to determine a file's size, together with seek() to effectively move backwards through a file.

In [None]:
# 9.3.1 showing the use of repeated seek()s to move through a binary file in reverse order

import os 

OUTFILE = 'data.bin'   # file to be written

if os.path.exists( OUTFILE ):
  print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
  print( f'creating a binary file named {OUTFILE}' )
  try:
    f = open( OUTFILE, 'wb' )
    f.write( bytearray( [1, 2, 3, 4, 5]) )
    f.close( )
    if os.path.isfile(OUTFILE):
      print( f'{OUTFILE} created - displaying contents\n' )
      f_contents = []
      with open( OUTFILE, 'rb' ) as f:
        f.seek( os.stat(f.fileno()).st_size -1 )    # seek to the file's final byte
        while True:
          f_contents.append( f.read(1)[0] )
          if f.tell() == 1:  break
          f.seek(-2, 1)                            # back up by one byte from current position
      print( ', '.join( [str(item) for item in f_contents] ) )
    else:
      raise FileNotFoundError(OUTFILE)
    print( f'\ncleaning up - removing {OUTFILE}' )
    os.remove( OUTFILE )
    if os.path.exists(OUTFILE):
      raise FileExistsError(OUTFILE)
  except Exception as err:
    print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

In [None]:
# 9.3.2  showing the use of repeated seek()s to move through a text file in reverse order

import os 

OUTFILE = 'data.txt'   # file to be written

if os.path.exists( OUTFILE ):
  print( f'{OUTFILE} already exists; please remove or rename it and rerun the example' )
else:
  print( f'creating a text file named {OUTFILE}' )
  try:
    with open( OUTFILE, 'w+' ) as f:
      f.write('Department of Computing\n')
      f.write('East Tennessee State University\n')
      f.write('Johnson City, TN\n')
      f.write('\n')
      f.write('Attn: Phil Pfeiffer\n')
    if os.path.isfile(OUTFILE):
      print( f'{OUTFILE} created - displaying contents in reverse order\n' )
      with open( OUTFILE, 'r' ) as f:
        this_byte = os.stat(f.fileno()).st_size -1   # os.stat requires a file descriptor as its argument
        previous_char = ''    # hack for Windows text files, which mark EOL with two characters, rather than one
        while this_byte >= 0:
          f.seek(0)           # return to start of file
          f.seek(this_byte)   # then seek to this byte
          this_char = f.read(1)
          if ( this_char == '\n' ) and ( previous_char == '\n' ):
            previous_char = ''   # managing a Windows annoyance - Python treats <CR><LF> as \n\n
                                 # make sure that multiple blank lines print correctly
          else:
            print( this_char, end='' )
            previous_char = this_char
          this_byte -= 1
    else:
      raise FileNotFoundError(OUTFILE)
    print( f'\n\ncleaning up - removing {OUTFILE}' )
    os.remove( OUTFILE )
    if os.path.exists(OUTFILE):
      raise FileExistsError(OUTFILE)
  except Exception as err:
    print( f'could not access {OUTFILE}', '' if str(err) is None else f': {str(err)}' )

<span style='color:blue'>&#128073;&ensp;&ensp;**Exercise 9.3.1:**

</span><span style='color:navy'>In the following code cell, modify the previous example to obtain a program that prints a text file's lines in reverse order, where each line's content is displayed in normal order. Use the Python library's `seek` library to successively identify line breaks, adding a line break after the file's last line if the file ends without a break.</span>

<span style='color:blue'>&#128073;&ensp;&ensp;**Exercise 9.3.2:**

</span><span style='color:navy'>In the following markdown cell, account for why Python's file processing routines, like other languages' routines, limit reverse seeks to binary files. Hint: search for a reference on UTF-8 and other standards for character encoding.</span>
***


***
