In [28]:
from __future__ import (absolute_import, division, print_function,
                    unicode_literals)
import os
from datetime import datetime
import sys

from caom2 import SimpleObservation, Plane, Artifact, Part, Chunk,\
                  TypedOrderedDict, ObservationWriter, ProductType, \
                  ReleaseType, TypedList, Target, TargetPosition, \
                  TargetType, ObservationIntentType, Instrument, \
                  Telescope, Environment, DataProductType, Provenance, \
                  CalibrationLevel, Metrics, Proposal, Point, Slice, Axis,\
                  ObservableAxis, CoordAxis1D, CoordAxis2D, SpatialWCS,\
                  SpectralWCS, EnergyTransition, TemporalWCS, CoordFunction1D,\
                  RefCoord, PolarizationWCS

from checksumdir import dirhash
import casatools
import numpy

msmd = casatools.msmetadata()
ms = casatools.ms()



In [13]:
ms_file = '/home/h14471mj/e-merlin/casa6_docker/prod/TS8004_C_001_20190801/TS8004_C_001_20190801_avg.ms/'

In [4]:
def basename(name):
    base_name = os.path.dirname(name).split('/')[-1]
    return base_name

In [6]:
def find_mssources(ms_file):
    # Get list of sources from measurement set
    # To do: discern target and calibrators for CAOM Observation.targetName
    msmd.open(ms_file)
    mssources = ','.join(numpy.sort(msmd.fieldnames()))
    #mssources = msmd.fieldnames()
    msmd.done()
    # logger.debug('Sources in MS {0}: {1}'.format(msfile, mssources))
    return mssources

In [7]:
def get_obs_name(ms_file):
    msmd.open(ms_file)
    obs_name = msmd.observatorynames()
    msmd.done()
    return obs_name[0]

In [25]:
def get_size(start_path = '.'):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)

    return total_size



In [30]:
class FileInfo:
    """
    Container for the metadata of a file:
        - ID
        - size
        - name
        - md5sum
        - file_type
        - encoding
    """
    def __init__(self, id, size=None, name=None, md5sum=None, lastmod=None,
                 file_type=None, encoding=None):
        if not id:
            raise AttributeError(
                'ID of the file in Storage Inventory is required')
        self.id = id
        self.size = size
        self.name = name
        self.md5sum = md5sum
        self.lastmod = lastmod
        self.file_type = file_type
        self.encoding = encoding

    def __str__(self):
        return (
            'id={}, name={}, size={}, type={}, encoding={}, last modified={}, '
            'md5sum={}'.format(self.id, self.name, self.size, self.file_type,
                               self.encoding, date2ivoa(self.lastmod),
                               self.md5sum))

In [26]:
def get_file_type(fqn):
    """Basic header extension to content_type lookup."""
    lower_fqn = fqn.lower()
    if os.path.isdir(fqn):
        return 'application/measurement-set'
    elif lower_fqn.endswith('.fits') or lower_fqn.endswith('.fits.fz') or lower_fqn.endswith('.fits.bz2'):
        return 'application/fits'
    elif lower_fqn.endswith('.gif'):
        return 'image/gif'
    elif lower_fqn.endswith('.png'):
        return 'image/png'
    elif lower_fqn.endswith('.jpg'):
        return 'image/jpeg'
    elif lower_fqn.endswith('.tar.gz'):
        return 'application/x-tar'
    elif lower_fqn.endswith('.csv'):
        return 'text/csv'
    elif lower_fqn.endswith('.hdf5') or fqn.endswith('.h5'):
        return 'application/x-hdf5'
    else:
        return 'text/plain'

In [8]:
def get_local_file_info(fqn):
    """
    Gets descriptive metadata for a directory of measurement set files on disk.
    :param fqn: Fully-qualified name of the file on disk.
    :return: FileInfo, no scheme on the md5sum value.
    """
    file_type_local = get_file_type(fqn)

    if file_type_local == 'application/measurement-set':
        file_size = get_size(fqn)
        final_hash_val = dirhash(fqn)  # very slow, may need to remove in future
        file_id = os.path.dirname(fqn).split('/')[-1]

    else:
        file_id = os.path.basename(fqn)
        s = os.stat(fqn)
        file_size = s.st_size
        hash_md5 = md5()
        with open(fqn, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b''):
                hash_md5.update(chunk)
        final_hash_val = hash_md5.hexdigest()

    meta = FileInfo(
        id=file_id,
        size=file_size,
        md5sum=final_hash_val,
        file_type=file_type_local,
    )
    return meta


In [10]:
def get_scan_sum(ms_file):
    ms.open(ms_file)
    scan_sum = ms.getscansummary()
    ms.close()
    return scan_sum

In [23]:
def create_xml_from_file(ms_file):
    
    obs_id = basename(ms_file)
    observation = SimpleObservation('collection', obs_id)
    observation.obs_type = 'science'
    observation.intent = ObservationIntentType.SCIENCE
    
    observation.target = Target('TBD')
    #observation.target_position = TargetPosition(str(find_mssources(ms_file)), 'J2000')
    
    observation.telescope = Telescope(get_obs_name(ms_file)[0])
    
    observation.planes = TypedOrderedDict(Plane)
    plane = Plane(obs_id) # use for now
    observation.planes[obs_id] = plane
    
    plane.artifacts = TypedOrderedDict(Artifact)
    artifact = Artifact('uri:foo/bar', ProductType.SCIENCE, ReleaseType.META)
    plane.artifacts['uri:foo/bar'] = artifact
    
    meta_data = get_local_file_info(ms_file)
    
    artifact.content_type = meta_data.file_type
    artifact.content_length = meta_data.size
    artifact.content_checksum = meta_data.md5sum
    
    artifact.parts = TypedOrderedDict(Part)
    part = Part('name')
    artifact.parts['name'] = part
    part.product_type = ProductType.SCIENCE
    
    part.chunks = TypedList(Chunk)
    
    ms_summary_data = get_scan_sum(ms_file)
    
    for key, value in ms_summary_data.items():
        chunk_dict = value['0']
        chunk = Chunk()
        part.chunks.append(chunk)
        time_axis = CoordAxis1D(Axis('MJD', 's'))
        chunk.time = TemporalWCS(time_axis)
        chunk.time.exposure = ms_summary_data['IntegrationTime']
        #         chunk.time.range.start.val = meta_data['BeginTime']
        #         chunk.time.range.end.val = meta_data['EndTime']
    writer = ObservationWriter()
    writer.write(observation, sys.stdout)

In [31]:
create_xml_from_file(ms_file)

TypeError: Expected <class 'caom2.common.ChecksumURI'> for checksum_uri, received <class 'str'>

In [5]:
observation = SimpleObservation('collection', 'observationID')
observation.obs_type = 'science'
observation.intent = ObservationIntentType.SCIENCE
# observation.meta_release = 

In [None]:
observation.target = Target(str(find_mssources(ms_file)))