# PhysioNet

## Access data

In [None]:
import os
import requests

# URL and filename
url = "https://physionet.org/files/i-care/2.0/training/"
patient = "0284"
filename = "0284_002_004_ECG.mat"

In [8]:
import pandas as pd

records_url = "https://physionet.org/files/i-care/2.0/training/RECORDS"
df = pd.read_csv(records_url,header=None,names=['path'])

# Create a new column by replacing 'training' and '/' with an empty string in the 'path' column
df['patient'] = df['path'].str.replace('training', '').str.replace('/', '')

df

Unnamed: 0,path,patient
0,training/0284/,0284
1,training/0286/,0286
2,training/0296/,0296
3,training/0299/,0299
4,training/0303/,0303
...,...,...
602,training/1016/,1016
603,training/1017/,1017
604,training/1018/,1018
605,training/1019/,1019


In [11]:
import requests

# Specify the URL of the HTML file
# url = "https://physionet.org/files/i-care/2.0/training/0284/"

# Send GET request
response = requests.get(url+patient)

# Check if request was successful
if response.status_code == 200:
    # Specify the filename
    filename = f"training/{patient}/{patient}.html"
    
    # Open file in write mode and save the content
    with open(filename, 'w', encoding='utf-8') as file:
        file.write(response.text)
        
    print(f"HTML content has been saved to {filename}")
else:
    print("Failed to retrieve HTML content.")


HTML content has been saved to training/0284/0284.html


In [12]:
df.patient[0]

'0284'

In [16]:
df2 = pd.read_csv(url+patient+"/RECORDS",header=None,names=['file'])
df2

Unnamed: 0,file
0,0284_001_004_ECG
1,0284_001_004_EEG
2,0284_001_004_OTHER
3,0284_002_005_ECG
4,0284_002_005_EEG
...,...
250,0284_084_073_EEG
251,0284_084_073_OTHER
252,0284_085_074_ECG
253,0284_085_074_EEG


In [17]:
# Iterate over the DataFrame and download each file
for index, row in df2.iterrows():
    # Extract file name without extension
    filename_without_ext = os.path.splitext(row['file'])[0]
    
    # For each file entry, construct URLs and download both .hea and .mat files
    for ext in ['.hea', '.mat']:
        # Construct the file URL
        file_url = url + patient + "/" + filename_without_ext + ext
        
        # Send a HTTP request to the server and save the response
        response = requests.get(file_url)
        
        # If request was successful, save the file
        if response.status_code == 200:
            # Full file path
            file_path = os.path.join(directory, filename_without_ext + ext)
            
            # Write content to local file
            with open(file_path, 'wb') as f:
                f.write(response.content)
            
            print(f"{filename_without_ext + ext} has been downloaded to {file_path}")
        else:
            print(f"Failed to download {filename_without_ext + ext} from {file_url}")

0284_001_004_ECG.hea has been downloaded to training\0284\0284_001_004_ECG.hea
0284_001_004_ECG.mat has been downloaded to training\0284\0284_001_004_ECG.mat
0284_001_004_EEG.hea has been downloaded to training\0284\0284_001_004_EEG.hea
0284_001_004_EEG.mat has been downloaded to training\0284\0284_001_004_EEG.mat
0284_001_004_OTHER.hea has been downloaded to training\0284\0284_001_004_OTHER.hea
0284_001_004_OTHER.mat has been downloaded to training\0284\0284_001_004_OTHER.mat
0284_002_005_ECG.hea has been downloaded to training\0284\0284_002_005_ECG.hea
0284_002_005_ECG.mat has been downloaded to training\0284\0284_002_005_ECG.mat
0284_002_005_EEG.hea has been downloaded to training\0284\0284_002_005_EEG.hea
0284_002_005_EEG.mat has been downloaded to training\0284\0284_002_005_EEG.mat
0284_002_005_OTHER.hea has been downloaded to training\0284\0284_002_005_OTHER.hea
0284_002_005_OTHER.mat has been downloaded to training\0284\0284_002_005_OTHER.mat
0284_003_006_ECG.hea has been downlo

In [2]:
# Create the directory if it does not exist
directory = os.path.join("training", patient)
if not os.path.exists(directory):
    os.makedirs(directory)

# Full file path
file_path = os.path.join(directory, filename)

# Full URL
full_url = url + patient + "/" + filename

# Download the file
response = requests.get(full_url)

# Save the file
with open(file_path, 'wb') as f:
    f.write(response.content)

print(f"{filename} has been downloaded to {file_path}")

0284_002_004_ECG.mat has been downloaded to training\0284\0284_002_004_ECG.mat


## Gcloud

Data:
- https://console.cloud.google.com/storage/browser/i-care-2.0.physionet.org

Resources:
- https://cloud.google.com/appengine/docs/legacy/standard/python/googlecloudstorageclient/read-write-to-cloud-storage
- https://cloud.google.com/storage/docs/gsutil_install#windows

In [None]:
from google.cloud import storage

def write_read(bucket_name, blob_name):
    """Write and read a blob from GCS using file-like IO"""
    # The ID of your GCS bucket
    # bucket_name = "your-bucket-name"

    # The ID of your new GCS object
    # blob_name = "storage-object-name"

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(blob_name)

    # Mode can be specified as wb/rb for bytes mode.
    # See: https://docs.python.org/3/library/io.html
    with blob.open("w") as f:
        f.write("Hello world")

    with blob.open("r") as f:
        print(f.read())

In [None]:
%gsutil -m -u YOUR_PROJECT_ID cp -r gs://i-care-2.0.physionet.org DESTINATION

## AWS

In [None]:
import boto3

# Let's use Amazon S3
s3 = boto3.resource('s3')
bucket = s3.Bucket('physionet2023')

for file in bucket.objects.all():
    print(file.key)

In [18]:
import boto3
s3_client = boto3.client('s3')
patient = "0284"
base_url = "https://physionet.org/files/i-care/2.0/"
file_name = "training/"+patient+"/RECORDS"
object_name = file_name
s3_bucket = "physionet2023"
# s3_client.upload_file(file_name, bucket, object_name)
s3_client.upload_file(file_name, s3_bucket, object_name)

In [21]:
import boto3
s3_client = boto3.client('s3')
patient = "0284"
base_url = "https://physionet.org/files/i-care/2.0/"
file_name = "training/"+patient+"/"+patient+".txt"
object_name = file_name
s3_bucket = "physionet2023"
# s3_client.upload_file(file_name, bucket, object_name)
s3_client.upload_file(file_name, s3_bucket, object_name)

In [20]:
import os
import pandas as pd
import boto3

# Initialize the S3 client
s3 = boto3.client('s3')

# Specify the name of your S3 bucket
bucket_name = 'physionet2023'

# Base URL
url = "https://physionet.org/files/i-care/2.0/training/"
patient = "0284"

# Create DataFrame from the RECORDS file
df2 = pd.read_csv(url + patient + "/RECORDS", header=None, names=['file'])

# Local directory
directory = os.path.join("training", patient)

# Iterate over the DataFrame and upload each file to S3
for index, row in df2.iterrows():
    # Extract file name without extension
    filename_without_ext = os.path.splitext(row['file'])[0]
    
    # For each file entry, construct local file paths and upload both .hea and .mat files to S3
    for ext in ['.hea', '.mat']:
        # Construct the local file path
        file_path = os.path.join(directory, filename_without_ext + ext)
        
        # Check if file exists locally
        if os.path.exists(file_path):
            # Construct the key for the S3 object
            s3_object_key = f"training/{patient}/{filename_without_ext + ext}"
            
            # Upload the file to S3
            try:
                s3.upload_file(file_path, bucket_name, s3_object_key)
                print(f"Uploaded {file_path} to {bucket_name}/{s3_object_key}")
            except Exception as e:
                print(f"Failed to upload {file_path} to {bucket_name}/{s3_object_key}. Error: {str(e)}")
        else:
            print(f"{file_path} does not exist locally")

Uploaded training\0284\0284_001_004_ECG.hea to physionet2023/training/0284/0284_001_004_ECG.hea
Uploaded training\0284\0284_001_004_ECG.mat to physionet2023/training/0284/0284_001_004_ECG.mat
Uploaded training\0284\0284_001_004_EEG.hea to physionet2023/training/0284/0284_001_004_EEG.hea
Uploaded training\0284\0284_001_004_EEG.mat to physionet2023/training/0284/0284_001_004_EEG.mat
Uploaded training\0284\0284_001_004_OTHER.hea to physionet2023/training/0284/0284_001_004_OTHER.hea
Uploaded training\0284\0284_001_004_OTHER.mat to physionet2023/training/0284/0284_001_004_OTHER.mat
Uploaded training\0284\0284_002_005_ECG.hea to physionet2023/training/0284/0284_002_005_ECG.hea
Uploaded training\0284\0284_002_005_ECG.mat to physionet2023/training/0284/0284_002_005_ECG.mat
Uploaded training\0284\0284_002_005_EEG.hea to physionet2023/training/0284/0284_002_005_EEG.hea
Uploaded training\0284\0284_002_005_EEG.mat to physionet2023/training/0284/0284_002_005_EEG.mat
Uploaded training\0284\0284_002_