In [6]:
import os
import json
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

class ArgoFloatProcessor:
    def __init__(self, data_directory):
        self.data_directory = Path(data_directory)
        self.float_data = {}
        
    def determine_location(self, lat, lon):
        """Determine ocean region based on latitude and longitude"""
        if pd.isna(lat) or pd.isna(lon):
            return "Unknown"
        
        # Bay of Bengal: roughly 5-22°N, 80-100°E
        if 5 <= lat <= 22 and 80 <= lon <= 100:
            return "Bay of Bengal"
        
        # Arabian Sea: roughly 0-30°N, 50-80°E
        elif 0 <= lat <= 30 and 50 <= lon <= 80:
            return "Arabian Sea"
        
        # Indian Ocean: broader region
        elif -50 <= lat <= 30 and 20 <= lon <= 120:
            return "Indian Ocean"
        
        else:
            return "Other"
    
    def safe_float_conversion(self, value):
        """Safely convert value to float, return NaN if not possible"""
        try:
            if pd.isna(value):
                return float('nan')
            return float(value)
        except (ValueError, TypeError):
            return float('nan')
    
    def safe_str_conversion(self, value):
        """Safely convert value to string"""
        try:
            if pd.isna(value):
                return ""
            if isinstance(value, bytes):
                return value.decode('utf-8').strip()
            return str(value).strip()
        except:
            return ""
    
    def calculate_stats(self, data):
        """Calculate min, max, avg from data array"""
        try:
            if data is None or len(data) == 0:
                return float('nan'), float('nan'), float('nan')
            
            # Flatten the data if it's multi-dimensional
            flat_data = np.array(data).flatten()
            
            # Remove NaN values
            valid_data = flat_data[~np.isnan(flat_data)]
            
            if len(valid_data) == 0:
                return float('nan'), float('nan'), float('nan')
            
            return float(np.min(valid_data)), float(np.max(valid_data)), float(np.mean(valid_data))
        except:
            return float('nan'), float('nan'), float('nan')
    
    def process_meta_file(self, file_path, float_id):
        """Process meta.nc file"""
        try:
            with xr.open_dataset(file_path) as ds:
                meta_info = {
                    'platform_number': self.safe_str_conversion(ds.PLATFORM_NUMBER.values),
                    'wmo_inst_type': self.safe_str_conversion(ds.WMO_INST_TYPE.values),
                    'project_name': self.safe_str_conversion(ds.PROJECT_NAME.values),
                    'pi_name': self.safe_str_conversion(ds.PI_NAME.values),
                    'data_centre': self.safe_str_conversion(ds.DATA_CENTRE.values),
                    'launch_info': {
                        'date': self.safe_str_conversion(ds.LAUNCH_DATE.values),
                        'latitude': self.safe_float_conversion(ds.LAUNCH_LATITUDE.values),
                        'longitude': self.safe_float_conversion(ds.LAUNCH_LONGITUDE.values),
                        'platform_type': self.safe_str_conversion(ds.PLATFORM_TYPE.values),
                        'float_serial_no': self.safe_str_conversion(ds.FLOAT_SERIAL_NO.values),
                        'deployment_platform': self.safe_str_conversion(ds.DEPLOYMENT_PLATFORM.values) if 'DEPLOYMENT_PLATFORM' in ds else "",
                        'deployment_cruise_id': self.safe_str_conversion(ds.DEPLOYMENT_CRUISE_ID.values) if 'DEPLOYMENT_CRUISE_ID' in ds else ""
                    },
                    'technical_info': {
                        'battery_type': self.safe_str_conversion(ds.BATTERY_TYPE.values) if 'BATTERY_TYPE' in ds else "",
                        'battery_packs': self.safe_str_conversion(ds.BATTERY_PACKS.values) if 'BATTERY_PACKS' in ds else "",
                        'controller_board_type_primary': self.safe_str_conversion(ds.CONTROLLER_BOARD_TYPE_PRIMARY.values) if 'CONTROLLER_BOARD_TYPE_PRIMARY' in ds else "",
                        'firmware_version': self.safe_str_conversion(ds.FIRMWARE_VERSION.values),
                        'sensors': []
                    }
                }
                
                # Extract sensor information
                if 'SENSOR' in ds:
                    sensors = ds.SENSOR.values
                    sensor_makers = ds.SENSOR_MAKER.values if 'SENSOR_MAKER' in ds else []
                    sensor_models = ds.SENSOR_MODEL.values if 'SENSOR_MODEL' in ds else []
                    sensor_serials = ds.SENSOR_SERIAL_NO.values if 'SENSOR_SERIAL_NO' in ds else []
                    
                    for i, sensor in enumerate(sensors):
                        sensor_info = {
                            'name': self.safe_str_conversion(sensor),
                            'maker': self.safe_str_conversion(sensor_makers[i]) if i < len(sensor_makers) else "",
                            'model': self.safe_str_conversion(sensor_models[i]) if i < len(sensor_models) else "",
                            'serial_no': self.safe_str_conversion(sensor_serials[i]) if i < len(sensor_serials) else ""
                        }
                        meta_info['technical_info']['sensors'].append(sensor_info)
                
                # Determine location
                lat = meta_info['launch_info']['latitude']
                lon = meta_info['launch_info']['longitude']
                meta_info['location'] = self.determine_location(lat, lon)
                
                return meta_info
        except Exception as e:
            print(f"Error processing meta file {file_path}: {e}")
            return None
    
    def process_profile_files(self, float_dir, float_id):
        """Process all profile files (prof.nc and cycle files) for a float"""
        all_data = {
            'temp': [], 'psal': [], 'pres': [], 'doxy': [],
            'fluorescence_chla': [], 'bbp700': [], 'nitrate': [],
            'ph': [], 'turbidity': [], 'cdom': []
        }
        cycle_count = 0
        
        # Process main profile file
        prof_file = float_dir / f"{float_id}_prof.nc"
        if prof_file.exists():
            try:
                with xr.open_dataset(prof_file) as ds:
                    cycle_count += len(ds.N_PROF) if 'N_PROF' in ds.dims else 0
                    
                    # Extract data for each parameter
                    for param in ['TEMP', 'PSAL', 'PRES']:
                        if param in ds:
                            data = ds[param].values
                            all_data[param.lower()].extend(data.flatten())
            except Exception as e:
                print(f"Error processing profile file {prof_file}: {e}")
        
        # Process cycle files (e.g., *_001.nc, *_002.nc, etc.)
        cycle_files = list(float_dir.glob(f"{float_id}_[0-9][0-9][0-9].nc"))
        cycle_count += len(cycle_files)
        
        for cycle_file in cycle_files:
            try:
                with xr.open_dataset(cycle_file) as ds:
                    # Map of parameter names to our keys
                    param_mapping = {
                        'TEMP': 'temp',
                        'PSAL': 'psal', 
                        'PRES': 'pres',
                        'DOXY': 'doxy',
                        'FLUORESCENCE_CHLA': 'fluorescence_chla',
                        'BBP700': 'bbp700',
                        'NITRATE': 'nitrate',
                        'PH_IN_SITU_TOTAL': 'ph',
                        'TURBIDITY': 'turbidity',
                        'CDOM': 'cdom'
                    }
                    
                    for nc_param, key in param_mapping.items():
                        if nc_param in ds:
                            data = ds[nc_param].values
                            all_data[key].extend(data.flatten())
            except Exception as e:
                print(f"Error processing cycle file {cycle_file}: {e}")
        
        # Calculate statistics for each parameter
        stats = {}
        for param, data in all_data.items():
            min_val, max_val, avg_val = self.calculate_stats(data)
            stats[f'{param}_min'] = min_val
            stats[f'{param}_max'] = max_val
            stats[f'{param}_avg'] = avg_val
        
        return stats, cycle_count
    
    def process_float(self, float_dir):
        """Process all files for a single float"""
        float_id = float_dir.name
        print(f"Processing float: {float_id}")
        
        # Initialize float data structure
        float_info = {
            "platform_number": float_id,
            "wmo_inst_type": "",
            "project_name": "",
            "pi_name": "",
            "data_centre": "",
            "launch_info": {
                "date": "",
                "latitude": float('nan'),
                "longitude": float('nan'),
                "platform_type": "",
                "float_serial_no": "",
                "deployment_platform": "",
                "deployment_cruise_id": ""
            },
            "location": "",
            "temp_max": float('nan'),
            "temp_min": float('nan'),
            "temp_avg": float('nan'),
            "psal_max": float('nan'),
            "psal_min": float('nan'),
            "psal_avg": float('nan'),
            "pres_max": float('nan'),
            "pres_min": float('nan'),
            "pres_avg": float('nan'),
            "doxy_max": float('nan'),
            "doxy_min": float('nan'),
            "doxy_avg": float('nan'),
            "fluorescence_chla_max": float('nan'),
            "fluorescence_chla_min": float('nan'),
            "fluorescence_chla_avg": float('nan'),
            "bbp700_max": float('nan'),
            "bbp700_min": float('nan'),
            "bbp700_avg": float('nan'),
            "nitrate_max": float('nan'),
            "nitrate_min": float('nan'),
            "nitrate_avg": float('nan'),
            "ph_max": float('nan'),
            "ph_min": float('nan'),
            "ph_avg": float('nan'),
            "turbidity_max": float('nan'),
            "turbidity_min": float('nan'),
            "turbidity_avg": float('nan'),
            "cdom_max": float('nan'),
            "cdom_min": float('nan'),
            "cdom_avg": float('nan'),
            "technical_info": {
                "battery_type": "",
                "battery_packs": "",
                "controller_board_type_primary": "",
                "firmware_version": "",
                "sensors": []
            },
            "cycles": 0,
            "launch_quality": "",
            "data_source": "",
            "status": "",
            "last_updated": ""
        }
        
        # Process meta file
        meta_file = float_dir / f"{float_id}_meta.nc"
        if meta_file.exists():
            meta_info = self.process_meta_file(meta_file, float_id)
            if meta_info:
                float_info.update(meta_info)
        
        # Process profile files and calculate statistics
        stats, cycle_count = self.process_profile_files(float_dir, float_id)
        float_info.update(stats)
        float_info['cycles'] = cycle_count
        
        return float_info
    
    def process_all_floats(self):
        """Process all float directories"""
        # Find all float directories
        float_dirs = [d for d in self.data_directory.iterdir() 
                     if d.is_dir() and d.name.isdigit()]
        
        print(f"Found {len(float_dirs)} float directories")
        
        for float_dir in float_dirs:
            try:
                float_info = self.process_float(float_dir)
                self.float_data[float_dir.name] = float_info
            except Exception as e:
                print(f"Error processing float {float_dir.name}: {e}")
        
        return self.float_data
    
    def save_json(self, output_file):
        """Save processed data to JSON file"""
        # Convert NaN values to null for JSON serialization
        def convert_nan_to_null(obj):
            if isinstance(obj, dict):
                return {k: convert_nan_to_null(v) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [convert_nan_to_null(item) for item in obj]
            elif isinstance(obj, float) and np.isnan(obj):
                return None
            else:
                return obj
        
        json_data = convert_nan_to_null(self.float_data)
        
        with open(output_file, 'w') as f:
            json.dump(json_data, f, indent=2)
        
        print(f"Data saved to {output_file}")


def main():
    # Set your data directory path here
    data_directory = "/Users/vinithlankireddy/Projects/SIH/example"  # Change this to your actual data path
    output_file = "argo_floats_data.json"
    
    # Check if directory exists
    if not os.path.exists(data_directory):
        print(f"Data directory {data_directory} does not exist!")
        print("Please update the data_directory variable with the correct path.")
        return
    
    # Initialize processor
    processor = ArgoFloatProcessor(data_directory)
    
    # Process all floats
    print("Starting to process Argo float data...")
    float_data = processor.process_all_floats()
    
    # Save to JSON
    processor.save_json(output_file)
    
    print(f"Processing complete! Found {len(float_data)} floats.")
    
    # Print summary
    for float_id, data in list(float_data.items())[:3]:  # Show first 3 floats as example
        print(f"\nFloat {float_id}:")
        print(f"  Location: {data['location']}")
        print(f"  Cycles: {data['cycles']}")
        print(f"  Temperature range: {data['temp_min']:.2f} - {data['temp_max']:.2f}")
        print(f"  Salinity range: {data['psal_min']:.2f} - {data['psal_max']:.2f}")


if __name__ == "__main__":
    main()

Starting to process Argo float data...
Found 21 float directories
Processing float: 1902767
Processing float: 1902677
Processing float: 2902114
Processing float: 1902670
Processing float: 2900229
Processing float: 2900228
Processing float: 1902671
Processing float: 2900226
Processing float: 1902676
Processing float: 1900121
Processing float: 5907092
Processing float: 2900232
Processing float: 2900233
Processing float: 1902673
Processing float: 1902674
Processing float: 1902675
Processing float: 1900122
Processing float: 1902672
Processing float: 2900230
Processing float: 1902669
Processing float: 1902785
Data saved to argo_floats_data.json
Processing complete! Found 21 floats.

Float 1902767:
  Location: Indian Ocean
  Cycles: 19
  Temperature range: 2.71 - 17.28
  Salinity range: 34.27 - 35.53

Float 1902677:
  Location: Indian Ocean
  Cycles: 61
  Temperature range: 2.37 - 30.86
  Salinity range: 33.62 - 35.31

Float 2902114:
  Location: Bay of Bengal
  Cycles: 284
  Temperature rang

In [8]:
import os
import json
import numpy as np
import xarray as xr
import pandas as pd
from pathlib import Path
import warnings

warnings.filterwarnings('ignore')

class ArgoFloatProcessor:
    def __init__(self, data_directory):
        self.data_directory = Path(data_directory)
        self.float_data = {}
        
    def determine_location(self, lat, lon):
        """Determine ocean region based on latitude and longitude"""
        if pd.isna(lat) or pd.isna(lon):
            return "Unknown"
        
        # Bay of Bengal: roughly 5-22°N, 80-100°E
        if 5 <= lat <= 22 and 80 <= lon <= 100:
            return "Bay of Bengal"
        
        # Arabian Sea: roughly 0-30°N, 50-80°E
        elif 0 <= lat <= 30 and 50 <= lon <= 80:
            return "Arabian Sea"
        
        # Indian Ocean: broader region
        elif -50 <= lat <= 30 and 20 <= lon <= 120:
            return "Indian Ocean"
        
        else:
            return "Other"
    
    def safe_float_conversion(self, value):
        """Safely convert value to float, return NaN if not possible"""
        try:
            if pd.isna(value):
                return float('nan')
            return float(value)
        except (ValueError, TypeError):
            return float('nan')
    
    def safe_str_conversion(self, value):
        """Safely convert value to string"""
        try:
            if pd.isna(value):
                return ""
            if isinstance(value, bytes):
                return value.decode('utf-8').strip()
            return str(value).strip()
        except:
            return ""
    
    def calculate_stats(self, data):
        """Calculate min, max, avg from data array"""
        try:
            if data is None or len(data) == 0:
                return float('nan'), float('nan'), float('nan')
            
            # Flatten the data if it's multi-dimensional
            flat_data = np.array(data).flatten()
            
            # Remove NaN values
            valid_data = flat_data[~np.isnan(flat_data)]
            
            if len(valid_data) == 0:
                return float('nan'), float('nan'), float('nan')
            
            return float(np.min(valid_data)), float(np.max(valid_data)), float(np.mean(valid_data))
        except:
            return float('nan'), float('nan'), float('nan')
    
    def process_meta_file(self, file_path, float_id):
        """Process meta.nc file"""
        try:
            with xr.open_dataset(file_path) as ds:
                meta_info = {
                    'platform_number': self.safe_str_conversion(ds.PLATFORM_NUMBER.values),
                    'wmo_inst_type': self.safe_str_conversion(ds.WMO_INST_TYPE.values),
                    'project_name': self.safe_str_conversion(ds.PROJECT_NAME.values),
                    'pi_name': self.safe_str_conversion(ds.PI_NAME.values),
                    'data_centre': self.safe_str_conversion(ds.DATA_CENTRE.values),
                    'launch_info': {
                        'date': self.safe_str_conversion(ds.LAUNCH_DATE.values),
                        'latitude': self.safe_float_conversion(ds.LAUNCH_LATITUDE.values),
                        'longitude': self.safe_float_conversion(ds.LAUNCH_LONGITUDE.values),
                        'platform_type': self.safe_str_conversion(ds.PLATFORM_TYPE.values),
                        'float_serial_no': self.safe_str_conversion(ds.FLOAT_SERIAL_NO.values),
                        'deployment_platform': self.safe_str_conversion(ds.DEPLOYMENT_PLATFORM.values) if 'DEPLOYMENT_PLATFORM' in ds else "",
                        'deployment_cruise_id': self.safe_str_conversion(ds.DEPLOYMENT_CRUISE_ID.values) if 'DEPLOYMENT_CRUISE_ID' in ds else ""
                    },
                    'technical_info': {
                        'battery_type': self.safe_str_conversion(ds.BATTERY_TYPE.values) if 'BATTERY_TYPE' in ds else "",
                        'battery_packs': self.safe_str_conversion(ds.BATTERY_PACKS.values) if 'BATTERY_PACKS' in ds else "",
                        'controller_board_type_primary': self.safe_str_conversion(ds.CONTROLLER_BOARD_TYPE_PRIMARY.values) if 'CONTROLLER_BOARD_TYPE_PRIMARY' in ds else "",
                        'firmware_version': self.safe_str_conversion(ds.FIRMWARE_VERSION.values),
                        'sensors': []
                    }
                }
                
                # Extract sensor information
                if 'SENSOR' in ds:
                    sensors = ds.SENSOR.values
                    sensor_makers = ds.SENSOR_MAKER.values if 'SENSOR_MAKER' in ds else []
                    sensor_models = ds.SENSOR_MODEL.values if 'SENSOR_MODEL' in ds else []
                    sensor_serials = ds.SENSOR_SERIAL_NO.values if 'SENSOR_SERIAL_NO' in ds else []
                    
                    for i, sensor in enumerate(sensors):
                        sensor_info = {
                            'name': self.safe_str_conversion(sensor),
                            'maker': self.safe_str_conversion(sensor_makers[i]) if i < len(sensor_makers) else "",
                            'model': self.safe_str_conversion(sensor_models[i]) if i < len(sensor_models) else "",
                            'serial_no': self.safe_str_conversion(sensor_serials[i]) if i < len(sensor_serials) else ""
                        }
                        meta_info['technical_info']['sensors'].append(sensor_info)
                
                # Determine location
                lat = meta_info['launch_info']['latitude']
                lon = meta_info['launch_info']['longitude']
                meta_info['location'] = self.determine_location(lat, lon)
                
                return meta_info
        except Exception as e:
            print(f"Error processing meta file {file_path}: {e}")
            return None
    
    def process_profile_files(self, float_dir, float_id):
        """Process all profile files (prof.nc and cycle files) for a float"""
        all_data = {
            'temp': [], 'psal': [], 'pres': [], 'doxy': [],
            'fluorescence_chla': [], 'bbp700': [], 'nitrate': [],
            'ph': [], 'turbidity': [], 'cdom': []
        }
        max_cycle_number = 0
        
        # Process main profile file
        prof_file = float_dir / f"{float_id}_prof.nc"
        if prof_file.exists():
            try:
                with xr.open_dataset(prof_file) as ds:
                    # Get maximum cycle number from profile file
                    if 'CYCLE_NUMBER' in ds:
                        cycle_nums = ds.CYCLE_NUMBER.values
                        valid_cycles = cycle_nums[~np.isnan(cycle_nums)]
                        if len(valid_cycles) > 0:
                            max_cycle_number = max(max_cycle_number, int(np.max(valid_cycles)))
                    
                    # Extract data for each parameter
                    for param in ['TEMP', 'PSAL', 'PRES']:
                        if param in ds:
                            data = ds[param].values
                            all_data[param.lower()].extend(data.flatten())
            except Exception as e:
                print(f"Error processing profile file {prof_file}: {e}")
        
        # Process cycle files (e.g., *_001.nc, *_002.nc, etc.)
        cycle_files = list(float_dir.glob(f"{float_id}_[0-9][0-9][0-9].nc"))
        
        for cycle_file in cycle_files:
            try:
                with xr.open_dataset(cycle_file) as ds:
                    # Get maximum cycle number from cycle file
                    if 'CYCLE_NUMBER' in ds:
                        cycle_nums = ds.CYCLE_NUMBER.values
                        valid_cycles = cycle_nums[~np.isnan(cycle_nums)]
                        if len(valid_cycles) > 0:
                            max_cycle_number = max(max_cycle_number, int(np.max(valid_cycles)))
                    
                    # Map of parameter names to our keys
                    param_mapping = {
                        'TEMP': 'temp',
                        'PSAL': 'psal', 
                        'PRES': 'pres',
                        'DOXY': 'doxy',
                        'FLUORESCENCE_CHLA': 'fluorescence_chla',
                        'BBP700': 'bbp700',
                        'NITRATE': 'nitrate',
                        'PH_IN_SITU_TOTAL': 'ph',
                        'TURBIDITY': 'turbidity',
                        'CDOM': 'cdom'
                    }
                    
                    for nc_param, key in param_mapping.items():
                        if nc_param in ds:
                            data = ds[nc_param].values
                            all_data[key].extend(data.flatten())
            except Exception as e:
                print(f"Error processing cycle file {cycle_file}: {e}")
        
        # Also check trajectory file for cycle numbers
        traj_file = float_dir / f"{float_id}_Rtraj.nc"
        if traj_file.exists():
            try:
                with xr.open_dataset(traj_file) as ds:
                    if 'CYCLE_NUMBER' in ds:
                        cycle_nums = ds.CYCLE_NUMBER.values
                        valid_cycles = cycle_nums[~np.isnan(cycle_nums)]
                        if len(valid_cycles) > 0:
                            max_cycle_number = max(max_cycle_number, int(np.max(valid_cycles)))
            except Exception as e:
                print(f"Error processing trajectory file {traj_file}: {e}")
        
        # Calculate statistics for each parameter
        stats = {}
        for param, data in all_data.items():
            min_val, max_val, avg_val = self.calculate_stats(data)
            stats[f'{param}_min'] = min_val
            stats[f'{param}_max'] = max_val
            stats[f'{param}_avg'] = avg_val
        
        return stats, max_cycle_number
    
    def process_float(self, float_dir):
        """Process all files for a single float"""
        float_id = float_dir.name
        print(f"Processing float: {float_id}")
        
        # Initialize float data structure
        float_info = {
            "platform_number": float_id,
            "wmo_inst_type": "",
            "project_name": "",
            "pi_name": "",
            "data_centre": "",
            "launch_info": {
                "date": "",
                "latitude": float('nan'),
                "longitude": float('nan'),
                "platform_type": "",
                "float_serial_no": "",
                "deployment_platform": "",
                "deployment_cruise_id": ""
            },
            "location": "",
            "temp_max": float('nan'),
            "temp_min": float('nan'),
            "temp_avg": float('nan'),
            "psal_max": float('nan'),
            "psal_min": float('nan'),
            "psal_avg": float('nan'),
            "pres_max": float('nan'),
            "pres_min": float('nan'),
            "pres_avg": float('nan'),
            "doxy_max": float('nan'),
            "doxy_min": float('nan'),
            "doxy_avg": float('nan'),
            "fluorescence_chla_max": float('nan'),
            "fluorescence_chla_min": float('nan'),
            "fluorescence_chla_avg": float('nan'),
            "bbp700_max": float('nan'),
            "bbp700_min": float('nan'),
            "bbp700_avg": float('nan'),
            "nitrate_max": float('nan'),
            "nitrate_min": float('nan'),
            "nitrate_avg": float('nan'),
            "ph_max": float('nan'),
            "ph_min": float('nan'),
            "ph_avg": float('nan'),
            "turbidity_max": float('nan'),
            "turbidity_min": float('nan'),
            "turbidity_avg": float('nan'),
            "cdom_max": float('nan'),
            "cdom_min": float('nan'),
            "cdom_avg": float('nan'),
            "technical_info": {
                "battery_type": "",
                "battery_packs": "",
                "controller_board_type_primary": "",
                "firmware_version": "",
                "sensors": []
            },
            "cycles": 0,
            "launch_quality": "",
            "data_source": "",
            "status": "",
            "last_updated": ""
        }
        
        # Process meta file
        meta_file = float_dir / f"{float_id}_meta.nc"
        if meta_file.exists():
            meta_info = self.process_meta_file(meta_file, float_id)
            if meta_info:
                float_info.update(meta_info)
        
        # Process profile files and calculate statistics
        stats, max_cycle_number = self.process_profile_files(float_dir, float_id)
        float_info.update(stats)
        float_info['cycles'] = max_cycle_number
        
        return float_info
    
    def process_all_floats(self):
        """Process all float directories"""
        # Find all float directories
        float_dirs = [d for d in self.data_directory.iterdir() 
                     if d.is_dir() and d.name.isdigit()]
        
        print(f"Found {len(float_dirs)} float directories")
        
        for float_dir in float_dirs:
            try:
                float_info = self.process_float(float_dir)
                self.float_data[float_dir.name] = float_info
            except Exception as e:
                print(f"Error processing float {float_dir.name}: {e}")
        
        return self.float_data
    
    def save_json(self, output_file):
        """Save processed data to JSON file"""
        # Convert NaN values to null for JSON serialization
        def convert_nan_to_null(obj):
            if isinstance(obj, dict):
                return {k: convert_nan_to_null(v) for k, v in obj.items()}
            elif isinstance(obj, list):
                return [convert_nan_to_null(item) for item in obj]
            elif isinstance(obj, float) and np.isnan(obj):
                return None
            else:
                return obj
        
        json_data = convert_nan_to_null(self.float_data)
        
        with open(output_file, 'w') as f:
            json.dump(json_data, f, indent=2)
        
        print(f"Data saved to {output_file}")


def main():
    # Set your data directory path here
    data_directory = "/Users/vinithlankireddy/Projects/SIH/example"  # Change this to your actual data path
    output_file = "argo_floats_data2.json"
    
    # Check if directory exists
    if not os.path.exists(data_directory):
        print(f"Data directory {data_directory} does not exist!")
        print("Please update the data_directory variable with the correct path.")
        return
    
    # Initialize processor
    processor = ArgoFloatProcessor(data_directory)
    
    # Process all floats
    print("Starting to process Argo float data...")
    float_data = processor.process_all_floats()
    
    # Save to JSON
    processor.save_json(output_file)
    
    print(f"Processing complete! Found {len(float_data)} floats.")
    
    # Print summary
    for float_id, data in list(float_data.items())[:3]:  # Show first 3 floats as example
        print(f"\nFloat {float_id}:")
        print(f"  Location: {data['location']}")
        print(f"  Cycles: {data['cycles']}")
        print(f"  Temperature range: {data['temp_min']:.2f} - {data['temp_max']:.2f}")
        print(f"  Salinity range: {data['psal_min']:.2f} - {data['psal_max']:.2f}")


if __name__ == "__main__":
    main()

Starting to process Argo float data...
Found 21 float directories
Processing float: 1902767
Processing float: 1902677
Processing float: 2902114
Processing float: 1902670
Processing float: 2900229
Processing float: 2900228
Processing float: 1902671
Processing float: 2900226
Processing float: 1902676
Processing float: 1900121
Processing float: 5907092
Processing float: 2900232
Processing float: 2900233
Processing float: 1902673
Processing float: 1902674
Processing float: 1902675
Processing float: 1900122
Processing float: 1902672
Processing float: 2900230
Processing float: 1902669
Processing float: 1902785
Data saved to argo_floats_data2.json
Processing complete! Found 21 floats.

Float 1902767:
  Location: Indian Ocean
  Cycles: 19
  Temperature range: 2.71 - 17.28
  Salinity range: 34.27 - 35.53

Float 1902677:
  Location: Indian Ocean
  Cycles: 61
  Temperature range: 2.37 - 30.86
  Salinity range: 33.62 - 35.31

Float 2902114:
  Location: Bay of Bengal
  Cycles: 284
  Temperature ran

In [None]:
import json
import xarray as xr
from pathlib import Path

def update_status(json_file, data_dir, output_file=None):

    json_file = Path(json_file)
    data_dir = Path(data_dir)

    # Load JSON
    with open(json_file, "r") as f:
        float_data = json.load(f)

    for float_id, info in float_data.items():
        meta_file = next(data_dir.glob(f"{float_id}/*_meta.nc"), None)

        if meta_file and meta_file.exists():
            try:
                ds = xr.open_dataset(meta_file)
                end_mission_date = ds.get("END_MISSION_DATE")

                if end_mission_date is not None:
                    value = str(end_mission_date.values).strip()

                    if value == "b'              '":
                        info["status"] = "active"
                    else:
                        info["status"] = "inactive"
                else:
                    info["status"] = "inactive"  # if variable doesn't exist
                ds.close()

            except Exception as e:
                print(f"Error reading {meta_file}: {e}")
                info["status"] = "inactive"
        else:
            print(f"No meta file found for float {float_id}")
            info["status"] = "inactive"

    # Save updated JSON
    out_path = output_file if output_file else json_file
    with open(out_path, "w") as f:
        json.dump(float_data, f, indent=2)

    print(f"✅ Updated JSON saved to {out_path}")




update_status("/Users/vinithlankireddy/Projects/SIH/argo_floats_data2.json", "/Users/vinithlankireddy/Projects/SIH/example")



✅ Updated JSON saved to /Users/vinithlankireddy/Projects/SIH/argo_floats_data2.json
