# Practicals for lecture 1.3

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vigji/python-cimec/blob/main/practicals/Practicals_1.3.ipynb)

## More on `pandas`

In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

#### 1.3.0 Organize a dataframe

In [None]:
# Let's have a look into how to organize data in a dataframe.

# Take the following function that generates results for many subjects on
# an experiment with experimental trials of different difficulty levels.

def get_experiment_block_data(n_subjects=50, difficulty_levels=(1, 2, 3, 4, 5), n_repetitions=50):
    """Generate a dataframe with results from an experiment with experimental blocks.

    Parameters
    ----------
    n_reps : int
        Number of experimental blocks.
    n_subjects : int
        Number of subjects.
    difficulty_levels : tuple
        Difficulty levels of the experimental blocks.

    Returns
    -------
    dict
        A dictionary with the results of the experiment for each subject.


    """


    np.random.seed(42)
    subject_dict = dict()
    for subject in range(n_subjects):
        subject_ability = np.random.randint(1, 6)
        difficulty_level_arr = np.random.choice(difficulty_levels, size=n_repetitions)
        rt = np.random.normal(1000, 100, size=n_repetitions) * difficulty_level_arr / subject_ability
        error = np.random.uniform(0, 1000*difficulty_level_arr / subject_ability, size=n_repetitions)

        subject_dict[f"subject_{subject}"] = dict(
            difficulty_level=difficulty_level_arr,
            rt=rt,
            error=error,
        )

    return subject_dict


# Run the function to generate the data dictionary.
# Every entry of the dictionary (a subject) contains arrays 
# for the trial difficulty level, reaction time, and error:
data = get_experiment_block_data()

# Convert the data to a dataframe:



In [None]:
dataframes_list = []

for key, subject_data in data.items():
    subject_df = pd.DataFrame(subject_data)
    
    subject_df["subject"] = key
    dataframes_list.append(subject_df)
    
trials_df = pd.concat(dataframes_list)
trials_df = trials_df.reset_index()
trials_df

In [None]:
# Select the data for subject 0, and plot the reaction time as a function of the trial difficulty level:
subject_df = trials_df[trials_df["subject"] == "subject_0"]
# plt.figure()
# plt.scatter(subject_df["difficulty_level"], subject_df["rt"])

subject_df.plot(kind="scatter", x="difficulty_level", y="rt")

In [None]:
?pd.read_csv

In [None]:
# Load the subjects dataframe from the csv file at the url:
# https://raw.githubusercontent.com/vigji/python-cimec/main/practicals/data/subjects_df.csv

subject_df = pd.read_csv("https://raw.githubusercontent.com/vigji/python-cimec/main/practicals/data/subjects_df.csv",
                         index_col=0)
subject_df

In [None]:
# Now use boolean indexing on the subject dataframe to include only left-handed males 
# above 30 years in the analysis.
# Plot the reaction time as a function of the trial difficulty level for this subpopulation:
selector = (subject_df["handedness"] == "left") & (subject_df["sex"] == "M") & (subject_df["age"] > 30)

included_subjects = subject_df[selector].index


In [None]:
# Select the data for subject 0, and plot the reaction time as a function of the trial difficulty level:
included_subjects_df = trials_df[trials_df["subject"].isin(included_subjects)]

included_subjects_df.plot(kind="scatter", x="difficulty_level", y="rt")