# <span style="color:red; font-size:30px"> Supplement: More on Lists and Arrays</span>

In [None]:
import numpy as np 

<font size = "4">

You can access elements of a list or array using negative indices.

The last element corresponds to -1, the second-to-last element corresponds to -2, etc.

In [None]:
favorite_movies = ["Rear Window", "Inception", "Dead Poet Society", "Baby Driver", "Evil Dead (2013 remake)"]

print(favorite_movies[0]) # first element
print(favorite_movies[1]) # second element
print(favorite_movies[2]) # third element
print()
print(favorite_movies[-1]) # last element
print(favorite_movies[-2]) # second-to-last element
print(favorite_movies[-3]) # third-to-last element

In [None]:
z = np.array([5, 4, 37, 92, 209])

print(z[-1]) # last element
print(z[-2]) # 2nd to last element

<font size = "4">

You can add new elements to a list using the `.append` method.

In [None]:
my_list = [7, 5, "Emory", -7, "Atlanta"]

print(my_list)

# add new item to the end of the list using the "append" method.
my_list.append("Department of Data and Decision Sciences")
print(my_list)

<font size = "4">

You can start with an empty list, then append new elements.

In [None]:
my_list = [] # empty list 
print(my_list)

my_list.append(1)
print(my_list)

my_list.append("abcd")
print(my_list)

my_list.append(33)
print(my_list)


<font size = "4">

Sorting lists with the `.sort` method

In [None]:
# create lists
numerical_list = [5.3, 25, -0.1, -5, 11, 3, 1.2]
string_list = ["Frank", "Zev", "Peter", "Abigail", "Ali", "Eleanor"]

# sort them
numerical_list.sort()
string_list.sort()

# print out sorted versions
print(numerical_list)
print(string_list)

<font size = "4">

**Copying lists**

Remember that the assignment operator doesn't make a copy if you put a list on the left-hand side:

In [None]:
list_of_floats = [2.3, -1.0, 5.1, 9.9]

not_a_copy = list_of_floats

list_of_floats[0] = "Nothing"

print(not_a_copy)

<font size = "4">

If you want to make a copy, use the `.copy` method

In [None]:
list_of_floats = [2.3, -1.0, 5.1, 9.9]

copy_of_list = list_of_floats.copy()

list_of_floats[0] = "Nothing"

print(copy_of_list)

<font size = "4">

**Other list methods**

See [here](https://docs.python.org/3/tutorial/datastructures.html) for all available list methods.
Here are two more useful ones:

In [None]:
# reverse the order of a list
colors = ["blue", "green", "red", "orange", "pink", "purple"]

colors.reverse()

print(colors)

In [None]:
# count the number of times an item appears in the list
fav_colors = ["blue", "pink", "black", "pink", "red", "blue", "green", "black", "pink"]

pink_count = fav_colors.count("pink")

print(pink_count) # prints "3", the number of times "pink" appears in the list

## <span style="color:red; font-size:25px"> Numpy Arrays </span>

<font size = "4">

Careful when making a copy...

In [None]:
x = np.array([1.2, 2.4, 4.8, 9.6])

not_a_copy = x

copy_of_x = np.copy(x) # use numpy.copy function

x[0] = -24

print(not_a_copy)
print(copy_of_x)

<font size = "4">

**Append a single value**

In [None]:
z = np.array([5.1, 3.0, -2, 4.4])
z = np.append(z, 12)
print(z)

<font size = "4">

**Append multiple values**

In [None]:
z = np.array([5.1, 3.0, -2, 4.4])
z = np.append(z, [13.1, 0.0, -1.0, 2.0])
print(z)

<font size = "4">

**Sorting arrays**

Sort the original array

In [None]:
z = np.array([5.1, 3.0, -2, 4.4])

# sort the array z
z.sort()

print(z)

<font size = "4">

Make a sorted copy

In [None]:
z = np.array([5.1, 3.0, -2, 4.4])

# created a sorted copy of z. The original array is left unchanged
z_sorted = np.sort(z)

print(z_sorted)
print(z)

<font size = "4">

**Computing summary statistics of a NumPy array**

Method 1:

In [None]:
c_vec = np.array([4.2, 1.2, 11.45, -2, 0, 9, 3.3, 2*np.pi])


print(np.mean(c_vec)) # mean
print(np.std(c_vec)) # standard deviation
print(np.median(c_vec)) # median
print(np.max(c_vec)) # maximum
print(np.min(c_vec)) # minimum

<font size = "4">

Method 2:

In [None]:
c_vec = np.array([4.2, 1.2, 11.45, -2, 0, 9, 3.3, 2*np.pi])

print(c_vec.mean())
print(c_vec.std())

# arrays don't have a .median() method for some reason...

print(c_vec.max())
print(c_vec.min())

## <span style="color:red; font-size:25px"> Numpy Arrays of Random Numbers </span>

<font size = "4">

Samples of random variables can be generated using the `numpy.random` sub-library

In [None]:
import numpy as np

# Generate a single sample from a standard Normal Distribution
# "Standard Normal" means the mean is zero, and the standard deviation is one.

normal_sample = np.random.normal()
print(normal_sample)

In [None]:
# Generate 10 samples from a standard Normal Distribution (saved in a numpy.array)

normal_sample = np.random.normal(size = 10)
print(normal_sample)
print(type(normal_sample))

In [None]:
# To change the mean of the normal distribution, use the "loc" argument
# This refers to the *location* of the mean.

# generate single sample of normal distribution with mean = 1
single_sample = np.random.normal(loc = 1)

# generate 7 samples from a normal distribution with mean = 1
seven_samples = np.random.normal(loc = 1, size = 7)
print(single_sample)
print(seven_samples)

In [None]:
# To change the standard deviation of the normal distribution, use the "scale" argument

# generate single sample of normal distribution with standard deviation = 2.4
single_sample = np.random.normal(scale = 2.4)

# generate 6 samples from a normal distribution with standard deviation = 2.4
six_samples = np.random.normal(scale = 2.4, size = 6)

# generate single sample from a normal distribution with standard deviation = 2.4, and mean = -1
single_sample_2 = np.random.normal(loc = -1, scale = 2.4)

# generate 5 samples from a normal distribution with standard deviation = 2.4, and mean = -1
five_samples = np.random.normal(loc = -1, scale = 2.4, size = 5)

print(single_sample)
print(six_samples)
print(single_sample_2)
print(five_samples)

##### **Run the next cell 3 times in a row.** Notice that the random numbers change each time

In [None]:
# run this 3 times
print(np.random.normal(loc = 0.5, scale = 3, size = 4))

##### In order to reproduce results, we often want the same set of random numbers to be generated.

##### We can do this by drawing from a "pre-generated" set, which is called a **seed**

##### **Run the next cell 3 times in a row.** Notice that the numbers stay the same

In [None]:
# run this cell 3 times, numbers will always be the same
np.random.seed(0)
print(np.random.normal(loc = 0.5, scale = 3, size = 4))

##### **Run the next cell 3 times in a row.** Notice that the numbers stay the same, but they are different than the previous cell

In [None]:
# Choose a different seed by selecting a different non-negative integer in the seed function

# run this 3 times
np.random.seed(3497) # you can change to different integer like 1, 2, 3, 100, 431, 981423 etc.
print(np.random.normal(loc = 0.5, scale = 3, size = 4))

#### Once we generate random variables from a distribution, we can create a **histogram**. This is a visual representation of the data, showing the frequency within ranges (known as bins)

#### Below, we generate a histogram of 1,000 samples of a standard normal variable using `matplotlib.pyplot`.

#### We set the seed so that the code is reproducible, i.e., the same values are drawn every time.

In [None]:
import matplotlib.pyplot as plt

np.random.seed(1989)


sample_size = 1000

standard_normal_samples = np.random.normal(size = sample_size)

plt.hist(x = standard_normal_samples)
plt.xlabel("Values drawn from Distribution")
plt.ylabel("Frequency")
plt.title("Histogram of standard normal variable (1,000 samples)")
plt.show()