# Make FITS Summary

This is to make a summary file for all the FITS files given to a function called ``make_summary``.

This is partly made for the TA material for Seoul National University's Astronomical Observation class.

In [1]:
'''
Made by Y. P. Bach (2017 Oct)
modified 2017-11-23
TA material for Seoul National University's Astronomical Observation class.
It extracts the header and extract some keywords.
This is easily modifiable for different observatories and different header 
formats (e.g., multi extension fits (MEF), missing header keywords). 
'''
import os
#import glob
#import numpy as np
from astropy.table import Table, Column
#from ccdproc import ImageFileCollection
from astropy.io import fits


def make_summary(filelist, location='.', fname_option = 'relative', 
                 save_summary='', save_format='ascii.csv',
                 keywords = [], dtypes = [], chmod = 777,
                 example_header = '', sort_by='file', verbose=True):
    """
    filelist: list of str
        The list of file paths, relative to ``location``
    
    location: str or path-like
        The top directory you want to use (``filelist`` will be searched from 
        here). 
    
    fname_option: str {'relative', 'basename'}
        Whether to save full relative path or only the filename.
    
    save_summary: str or path-like
        The directory and file name of the output summary file. Leave blank 
        for not saving anything.
    
    save_format: str
        The astropy.table.Table output format.
    
    keywords: list
        The list of the keywords to extract (keywords should be in ``str``).
    
    dtypes: list
        The list of dtypes of keywords if you want to specify. If ``[]``, 
        ``['U80'] * len(keywords)`` will be used. Otherwise, it should have
        the same length with ``keywords``.
    
    chmod: int
        the chmod code (e.g., 777 for ``rwxrwxrwx``).
    
    example_header: str or path-like    
        The path including the filename of the output summary text file. No 
        file will be saved if ``example_header=''``.
        
    sort_by: str
        The column name to sort the results. It can be any element of 
        ``keywords`` or ``'file'``, which sorts the table by the file name.
    """
    
    if ((fname_option != 'relative') and (fname_option != 'basename')):
        raise KeyError("fname_option must be either 'relative' or 'basename'.")


    def _get_fname(path):
        if fname_option == 'relative':
            return path
        else:
            return os.path.basename(path)
    
    if verbose:
        print("Extracting keys: ", keywords)
        
#    IC = ImageFileCollection(location=location, 
#                             filenames=filelist, 
#                             keywords=keywords)
        
    # Save example header
    if example_header != "":
        example_fits = filelist[0]
        if verbose:
            print("Extract example header from {:s}".format(example_fits))
            print("and save as {:s}".format(example_header))
        ex_hdr = fits.getheader()
        ex_hdr.totextfile(example_header, overwrite=True)
        
    # Initialize
    if len(dtypes) == 0:
        dtypes = ['U80'] * len(keywords)
        # FITS header MUST be within 80 characters! (FITS standard)
    
    summarytab = Table(names=keywords, dtype=dtypes)
    fnames = []
    
    # Run through all the fits files
    for fitsfile in filelist:
        os.chmod(fitsfile, chmod)
        fnames.append(_get_fname(fitsfile))    
        hdr = fits.getheader(fitsfile)
        row = []
        for key in keywords:
            try:
                row.append(hdr[key])
            except KeyError:
                if verbose:
                    print("Key {:s} not found for {:s}, filling with '--'.".format(key, fitsfile))
                try:
                    row.append('--')
                except ValueError:
                    raise ValueError("Please use 'U80' as the dtype for the key {:s}.")
        summarytab.add_row(row)
    
    # Attache the file name, and then sort by file name.
    fnames = Column(data=fnames, name='file')
    summarytab.add_column(fnames, index=0)
    summarytab.sort(sort_by)    
    

    # sort by a key if ``sort_by`` is given    
    if ((sort_by != '') and (sort_by != None)):
        summarytab.sort('file')
    
    if save_summary != '':
        if verbose:
            print('Saving the summary file to "{:s}"'.format(save_summary))
        summarytab.write(save_summary, format=save_format, overwrite=True)
        
    return save_summary

## Usage Example
For all the FITS files in ``./data/*.fits``, I will extract some keyword information as in the following variable ``keywords``. I will not specify the data types, i.e., the function automatically regards all values as 80-byte unicode strings.

```python
import glob
keywords = ['DATE-OBS', 'UT', 'EXPTIME', 'MJD', 'OBSERVAT', 'NAXIS1', 'NAXIS2', 'OBJECT', 'FILTER', 'AIRMASS']
allfits = glob.glob(os.path.join('data', '*.fits'), recursive=True)
make_summary(allfits[:3], 
             location='.', 
             keywords=keywords,
             fname_option='relative', 
             save_summary='summary.csv', 
             save_format = 'ascii.csv',
             sort_by='file'
             chmod = 777,
             example_header='ex_hdr.txt',
             keywords=keys)
```

The result can be, for example, a csv file like this:
```
file,DATE-OBS,UT,EXPTIME,MJD,OBSERVAT,NAXIS1,NAXIS2,OBJECT,FILTER,AIRMASS
data/test1.fits,2016-06-02,11:41:24.94,120.0,57541.487094,OAO/NAOJ,1024,1024,1984QY1_05,R,1.167
data/test2.fits,2016-06-02,13:52:11.04,120.0,57541.577906,OAO/NAOJ,1024,1024,1984QY1_11,R,1.2876
data/test3.fits,2016-06-02,16:32:13.93,120.0,57541.68905,OAO/NAOJ,1024,1024,1984QY1_19,R,1.7533
```
In a more readable form,

In [13]:
import pandas as pd
import os
data = pd.read_csv(os.path.join('data', 'summary_example.csv'), sep=',')
print(data)

              file    DATE-OBS           UT  EXPTIME           MJD  OBSERVAT  \
0  data/test1.fits  2016-06-02  11:41:24.94    120.0  57541.487094  OAO/NAOJ   
1  data/test2.fits  2016-06-02  13:52:11.04    120.0  57541.577906  OAO/NAOJ   
2  data/test3.fits  2016-06-02  16:32:13.93    120.0  57541.689050  OAO/NAOJ   

   NAXIS1  NAXIS2      OBJECT FILTER  AIRMASS  
0    1024    1024  1984QY1_05      R   1.1670  
1    1024    1024  1984QY1_11      R   1.2876  
2    1024    1024  1984QY1_19      R   1.7533  


In [14]:
from IPython.display import display, HTML
HTML(data.to_html())

Unnamed: 0,file,DATE-OBS,UT,EXPTIME,MJD,OBSERVAT,NAXIS1,NAXIS2,OBJECT,FILTER,AIRMASS
0,data/test1.fits,2016-06-02,11:41:24.94,120.0,57541.487094,OAO/NAOJ,1024,1024,1984QY1_05,R,1.167
1,data/test2.fits,2016-06-02,13:52:11.04,120.0,57541.577906,OAO/NAOJ,1024,1024,1984QY1_11,R,1.2876
2,data/test3.fits,2016-06-02,16:32:13.93,120.0,57541.68905,OAO/NAOJ,1024,1024,1984QY1_19,R,1.7533
