# Practicals for lecture 1.1

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vigji/python-cimec/blob/main/practicals/Practicals_1.1.ipynb)

## More on `numpy`

In [None]:
import numpy as np
from matplotlib import pyplot as plt

#### 1.1.0 Stats over numpy arrays

In [None]:
# Use np.random.normal to initialize a vector of 1000 numbers of mean 10 and standard deviation 3. 
# Then calculate the actual mean and standard deviation of the array you got using numpy.
arr = np.random.normal(10, 3, 1000)

np.mean(arr), np.std(arr)

In [None]:
import requests
import json

def download_meteo_data(start_date="2022-01-01", end_date="2022-12-31",
                        latitude="45.88204", longitude="11.03647",
                        data="temperature_2m"):
    """Download meteo historical data from open-meteo.com.
    
    Parameters
    ----------
        start_date : str
            Beginning of time series.
            
        end_date : str
            End of time series.
            
        latitude : float
            Latitude of the time series.
            
        longitude : float
            Longitude of the time series.
            
        data : str
            Data to download. One of "temperature_2m", "relativehumidity_2m",
            "precipitation", "snowfall", "windspeed_10m".
            
    Returns
    -------
        np.array
            1D array of timestamps
        np.array
            1D array of data, sampled every hour (24 points per day)

    """
    BASE_URL = "https://archive-api.open-meteo.com/v1/"
    query = f"archive?latitude={latitude}&longitude={longitude}&start_date={start_date}&end_date={end_date}&hourly={data}"

    r = requests.get(BASE_URL + query)
    json_dict = json.loads(r.text)
    
    if "hourly" not in json_dict.keys():
        print(json_dict)
        return None, None
    else:
        return (np.array(json_dict["hourly"][k]) for k in ["time", data])


tststamps_array, temp_array = download_meteo_data()

In [None]:
temp_array.shape

In [None]:
# Run the cell above to download an array of temperatures in Rovereto during 2022. 
# Temperatures data are sampled every hour. 

# Reshape the array to be a matrix of shape (n_days, n_hours). 
n_days = 365
n_hours = 24

temp_mat = np.reshape(temp_array, (n_days, n_hours))

# Plot it with plt.matshow() to check if it makes sense. Change the colormap to be divergent around 0.
plt.imshow(temp_mat, aspect="auto", cmap="RdBu_r")
plt.set_xlabel="Hour"
plt.set_ylabel="Day"
plt.colorbar(label="T (°C)")

In [None]:
# Use plt.plot to show temperatures for all days (each day a line). You can do it in a for loop,
# or in one call of the function given the right dimension order for the data matrix!
# for day_idx in range(n_days):
#     plt.plot(temp_mat[day_idx, :])
plt.plot(temp_mat.T, c="0.6")



# Compute the average temperature line over days, and plot it on top of the individual day lines.
# Look into the plt.plot documentation to make the lines of the individual days gray and the average red.

avg_daily_temps = np.mean(temp_mat, axis=0)
plt.plot(avg_daily_temps, c="r")


In [None]:
# From the temperatures data, create one-dimensional arrays with the minimum, mean and maximum temperatures
# of each day.
mean_t = np.mean(temp_mat, axis=1)
min_t = np.min(temp_mat, axis=1)
max_t = np.max(temp_mat, axis=1)

# Look into the documentation for the plt.fill_between() function, and use it to make a plot 
# where you represent the temperature range for every day of the year.
x_array = np.arange(n_days)
plt.fill_between(x_array, min_t, max_t)
plt.plot(x_array, mean_t, c="r")

In [None]:
# Repeat the same, but now representing the 25th-75th percentile range for every day.

perc25_t = np.percentile(temp_mat, 25, axis=1)
perc75_t = np.percentile(temp_mat, 75, axis=1)

# Look into the documentation for the plt.fill_between() function, and use it to make a plot 
# where you represent the temperature range for every day of the year.
x_array = np.arange(n_days)
plt.fill_between(x_array, perc25_t, perc75_t)
plt.plot(x_array, mean_t, c="r")

In [None]:
# Are Murphy's laws true? Does it rain more on weekends?

# Write the docs of download_meteo_data(), and use it to download precipitation data from 2022.
# Tip: change the end_date argument to be end_date="2023-01-02" to have a multiple of 7 days!
# Tip2: check out a calendar to see which weekday the array will start from.

# Reshape the matrix as we did before, and compute cumulative (or average) precipitations per day.


In [None]:
# Then, reshape the daily averages array to be of shape (n_weeks, n_weekdays)

# Finally, take the average over the n_weeks dimension and plot median precipitation for each weekday!
# Bonus points: represent the dispersion of the data (std or percentiles) using plt.fill_between().


#### 1.1.1 Vectorizations and indexing

In [None]:
# Let's have a look at daily excursions instead of absolute temperatures!

# Write a function that takes as input a matrix and subtracts from each row 
# the minimum value of that row, in a loop. 
# Make sure you do not change the original matrix when running the function!

# Now, write a second function that does the same in a single vector operation:


# Then, test it over the temperature data matrix. Use plt.matshow to visualize it before and after
# the offset subtraction. 
# Tip: you can use plt.subplots() to show multiple plots next to each other


In [None]:
# We can use np.argsort() to sort a whole array based on the values of another array!

# For a full ranking of the most rainy days of 2022, sort
# the timestamps array based on the sorting of precipitation array. 
# Make sure the first element matches the result that you have got with np.argmax!


In [None]:
# Spike detection (optional!)

# Run the function below to generate an synthetic extracellular
# recording for a neuron. Plot the trace; can you see the spikes?

# Write a function to detect spikes! Think about a good strategy to do this
# before starting.
# The function should take the trace as input, and return the index of each spike
# as the output.
# Make sure you do not get more than one index for each spike!

# Pro challenge: if you can, try not to write any loop.

# Then, write a crop_event function that takes as inputs:
#    - the recording array
#.   - the spike indexes
#    - a n_points variable specifying the number of points to crop before and after the spike
#
# And returns a (n_spikes, n_points*2) matrix of spike events cropped out of the recording!
# Plot the matrix you get. If you want you can try to normalize it with the function you wrote for
# the daily temperatures excursion exercise!


In [None]:
def generate_spike_trace(trace_length=60, firing_rate=1, noise_sigma = 0.03):
    """Function to generate a fake extracellular recording.
    
    Parameters
    ----------
        trace_length : float
            Duration of the recording in seconds.
        
        firing_rate : float
            Average firing rate of the neuron in Hz.
            
        noise_sigma : float
            Noise level.
            
            
    Returns:
    --------
        np.array
            Fake recording shape.
    
    """
    np.random.seed(42)
    FS = 10000  # sampling frequency
    n = int(trace_length * FS)  # number of samples
    
    # Generate spike shape template as a difference of Gaussians.
    # A horrible bunch of magic numbers - do not imitate!
    x = np.arange(30)
    spike_template = np.exp(-(x - 10)**2/6) - np.exp(-(x - 12)**2/16)*0.8

    # Generate spike times from a gaussian distribution:
    spikes_times = np.random.poisson(firing_rate / FS, n)
    
    # Convolve dirac delta functions of spike times with spike template:
    trace = np.convolve(spikes_times, spike_template)[:n]

    # Add some gaussian noise:
    trace += np.random.normal(0, noise_sigma, n)
    
    return trace