In [1]:
#%conda create --name rna
#%conda activate rna
#%conda install --yes --file ../docs/requirements.txt


Note: you may need to restart the kernel to use updated packages.


In [None]:
%conda list --export > ../docs/requirements.txt

In [1]:
import pathlib
import requests
import scanpy as sc

In [2]:
# Create a path to the data directory
DATA_DIR = pathlib.Path.cwd().parent / 'data'
print(DATA_DIR)

/home/victor/csbiology/single_cell_rna_seq_analysis/data


In [None]:
# Create a directory to store the data if it doesn't exist
DATA_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
# Set a url to download the data
raw_data_address = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE171524&format=file"

In [None]:
# Send an HTTP GET request to the URL to get tar folder
response = requests.get(raw_data_address)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Write the tar folder to the data directory
    with open(DATA_DIR / 'raw_data.tar', 'wb') as f:
        f.write(response.content)
    print("Downloaded the tar file successfully!")
    print("Open the directory and extract the tar file.")
    
else:
    print(f"Failed to download the CSV file. Status code: {response.status_code}")


In [4]:
SAMPLE_RAW_DATA_PATH = DATA_DIR / 'raw_data' / 'GSM5226574_C51ctr_raw_counts.csv.gz'

In [5]:
sample_data = sc.read_csv(SAMPLE_RAW_DATA_PATH).T
sample_data

AnnData object with n_obs × n_vars = 6099 × 34546

In [6]:
sample_data.obs

TAGGTACCATGGCCAC-1_1
ATTCACTGTAACAGGC-1_1
TAACTTCCAACCACGC-1_1
TTGGGTACACGACAAG-1_1
AGGCCACAGAGTCACG-1_1
...
CGCCATTGTTTGCCGG-1_1
CACTGGGGTCTACGTA-1_1
CATACTTGTAGAGGAA-1_1
TTTGGTTTCCACGGAC-1_1
ATGCATGAGTCATGAA-1_1


In [7]:
sample_data.var

AL627309.1
AL627309.5
AL627309.4
AL669831.2
LINC01409
...
VN1R2
AL031676.1
SMIM34A
AL050402.1
AL445072.1


In [8]:
sample_data.X

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [9]:
sample_data.X.shape

(6099, 34546)