# NumPy - Exercise Complete Baseball

In this exercise, our goal is to harness the capabilities of Python, specifically leveraging key libraries such as `numpy` and `pandas`, to conduct a thorough analysis of a baseball dataset. Through the use of `pandas`, we read the dataset into a DataFrame, allowing us to explore and extract pertinent information such as player weights and heights. The subsequent conversion of these data points into NumPy arrays enables us to perform essential array operations, access individual elements, and create a 2D array for more in-depth analysis. As we delve into array properties, calculate basic statistics on height, including average and median, and explore the correlation between height and weight, this Jupyter Notebook serves as an interactive guide, providing a hands-on experience in utilizing Python for data manipulation and statistical insights, all within the fascinating context of baseball analytics.


In [20]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Step 1: Read the MLB (Baseball) dataset into a pandas DataFrame
mlb = pd.read_csv("MLB (Baseball).txt")
print(mlb.head(3))  # Print the first 3 rows of the DataFrame

# Step 2: Extract weight and height columns from the DataFrame
weight_lb = mlb['Weight'].tolist()
height_in = mlb['Height'].tolist()

# Step 3: Store weight and height lists as numpy arrays
np_weight_lb = np.array(weight_lb)
np_height_in = np.array(height_in)

# Step 4: Access individual elements in the arrays
print("Weight at index 50:", np_weight_lb[49])  # Indexing starts from 0
print("Height from index 100 to 110:", np_height_in[99:110])

# Step 5: Create np_baseball from the DataFrame
np_baseball = mlb[['Height', 'Weight']].to_numpy()

# Step 6: Explore the properties of np_baseball
print("Type of np_baseball:", type(np_baseball))
print("Shape of np_baseball:", np_baseball.shape)
print("50th row of np_baseball:", np_baseball[49, :])
print("Height of 124th player:", np_baseball[123, 0])

# Step 7: Calculate statistics for height, same way to do for "weight"
avg_height = np.mean(np_baseball[:, 0])
med_height = np.median(np_baseball[:, 0])
stddev_height = np.std(np_baseball[:, 0])
corr_height_weight = np.corrcoef(np_baseball[:, 0], np_baseball[:, 1])

# Step 8: Print height statistics, same way to do for "weight"
print("Average Height:", avg_height)
print("Median Height:", med_height)
print("Standard Deviation of Height:", stddev_height)
print("Correlation between Height and Weight:", corr_height_weight[0, 1])

# Step 9: Print additional statistics for height, same way to do for "weight"
print("Mean of Height:", np.mean(np_height_in))
print("Median of Height:", np.median(np_height_in))


              Name Team Position  Height  Weight    Age PosCategory
0    Adam_Donachie  BAL  Catcher      74     180  22.99     Catcher
1        Paul_Bako  BAL  Catcher      74     215  34.69     Catcher
2  Ramon_Hernandez  BAL  Catcher      72     210  30.78     Catcher
Weight at index 50: 195
Height from index 100 to 110: [74 73 74 72 73 69 72 73 75 75 73]
Type of np_baseball: <class 'numpy.ndarray'>
Shape of np_baseball: (1015, 2)
50th row of np_baseball: [ 70 195]
Height of 124th player: 75
Average Height: 73.6896551724138
Median Height: 74.0
Standard Deviation of Height: 2.312791881046546
Correlation between Height and Weight: 0.5315393226146092
Mean of Height: 73.6896551724138
Median of Height: 74.0


# if - else statements

# for looping common operations

In [2]:
# Loop through a list of numbers
numbers = [1, 2, 3, 4, 5]
print("Loop through a list of numbers:")
for number in numbers:
    print(number)

# Loop through a range of numbers
print("\nLoop through a range of numbers:")
for i in range(5):
    print(i)

# Loop through characters in a string
print("\nLoop through characters in a string:")
for char in "Python":
    print(char)

# Loop through key-value pairs in a dictionary
print("\nLoop through key-value pairs in a dictionary:")
person = {'name': 'John', 'age': 30, 'city': 'New York'}
for key, value in person.items():
    print(key, value)

# Loop through elements with index using enumerate
print("\nLoop through elements with index using enumerate:")
fruits = ['apple', 'banana', 'cherry']
for index, fruit in enumerate(fruits):
    print(index, fruit)

# Nested loops
print("\nNested loops:")
for i in range(3):
    for j in range(3):
        print(i, j)

# Continue statement to skip iteration
print("\nContinue statement to skip iteration:")
numbers = [1, 2, 3, 4, 5]
for number in numbers:
    if number == 3:
        continue  # Skip the rest of the loop for this iteration
    print(number)

# Loop with else statement
print("\nLoop with else statement:")
for i in range(5):
    print(i)
else:
    print("Loop completed without a break.")


Loop through a list of numbers:
1
2
3
4
5

Loop through a range of numbers:
0
1
2
3
4

Loop through characters in a string:
P
y
t
h
o
n

Loop through key-value pairs in a dictionary:
name John
age 30
city New York

Loop through elements with index using enumerate:
0 apple
1 banana
2 cherry

Nested loops:
0 0
0 1
0 2
1 0
1 1
1 2
2 0
2 1
2 2

Continue statement to skip iteration:
1
2
4
5

Loop with else statement:
0
1
2
3
4
Loop completed without a break.


In [24]:
# Example: Check conditions and perform actions accordingly

# Scenario 1: Numeric Comparison
x = 10
if x > 5:
    print("x is greater than 5")
elif x == 5:
    print("x is equal to 5")
else:
    print("x is less than 5")

# Scenario 2: String Comparison
country = "USA"
if country == "USA":
    print("Country is United States")
elif country == "Canada":
    print("Country is Canada")
else:
    print("Country is neither USA nor Canada")

# Scenario 3: Checking for Membership
fruits = ['apple', 'banana', 'orange']
fruit_to_check = 'banana'
if fruit_to_check in fruits:
    print(f"{fruit_to_check} is in the list")
else:
    print(f"{fruit_to_check} is not in the list")

# Scenario 4: Logical Conditions
age = 25
income = 50000
if age > 18 and income > 30000:
    print("Eligible for loan")
else:
    print("Not eligible for loan")

# Scenario 5: Nested if-else
temperature = 28
if temperature > 30:
    print("High temperature detected")
else:
    print("Normal or low temperature detected")
    if temperature < 0:
        print("Extreme cold temperature detected")

# Scenario 6: Ternary Operator
x = 10
message = "Even" if x % 2 == 0 else "Odd"
print(message)

# Scenario 7: Checking for None
value = None
if value is None:
    print("Value is None")
else:
    print("Value is not None")

# Scenario 8: Using not in conditions
is_raining = False
if not is_raining:
    print("No need for an umbrella")
else:
    print("Bring an umbrella")

# Scenario 9: Multiple conditions using or
day = "Saturday"
if day == "Saturday" or day == "Sunday":
    print("It's the weekend!")
else:
    print("It's a weekday")

# Scenario 10: Multiple conditions using and
temperature = 25
if temperature > 20 and temperature < 30:
    print("Temperature is comfortable")
else:
    print("Temperature is not in the comfort range")


x is greater than 5
Country is United States
banana is in the list
Eligible for loan
Normal or low temperature detected
Even
Value is None
No need for an umbrella
It's the weekend!
Temperature is comfortable


# While loop over conditions

In [3]:
# Simple While Loop:
print("Simple While Loop:")
count = 0
while count < 5:
    print(count)
    count += 1

# Using else with While Loop:
print("\nUsing else with While Loop:")
count = 0
while count < 5:
    print(count)
    count += 1
else:
    print("Loop completed without a break.")

# While Loop with continue:
print("\nWhile Loop with continue:")
count = 0
while count < 5:
    count += 1
    if count == 3:
        continue  # Skip the rest of the loop for this iteration
    print(count)

# Using break to Exit Loop Prematurely:
print("\nUsing break to Exit Loop Prematurely:")
count = 0
while count < 5:
    if count == 3:
        break  # Exit the loop when count is 3
    print(count)
    count += 1

# While Loop with Condition:
print("\nWhile Loop with Condition:")
temperature = 3
while temperature < 10:
    print(f"Temperature is {temperature}°C. Heating...")
    temperature += 1

# Using else with a Condition:
print("\nUsing else with a Condition:")
temperature = 0
while temperature < 100:
    print(f"Temperature is {temperature}°C. Heating...")
    temperature += 10
else:
    print("Reached maximum temperature.")


Simple While Loop:
0
1
2
3
4

Using else with While Loop:
0
1
2
3
4
Loop completed without a break.

While Loop with continue:
1
2
4
5

Using break to Exit Loop Prematurely:
0
1
2

While Loop with Condition:
Temperature is 3°C. Heating...
Temperature is 4°C. Heating...
Temperature is 5°C. Heating...
Temperature is 6°C. Heating...
Temperature is 7°C. Heating...
Temperature is 8°C. Heating...
Temperature is 9°C. Heating...

Using else with a Condition:
Temperature is 0°C. Heating...
Temperature is 10°C. Heating...
Temperature is 20°C. Heating...
Temperature is 30°C. Heating...
Temperature is 40°C. Heating...
Temperature is 50°C. Heating...
Temperature is 60°C. Heating...
Temperature is 70°C. Heating...
Temperature is 80°C. Heating...
Temperature is 90°C. Heating...
Reached maximum temperature.


# Complete Cars

The overall purpose is to showcase common operations when working with Pandas DataFrames, including data retrieval, column selection, slicing, and label-based indexing. It's a practical example for understanding and manipulating tabular data using Pandas.

In [18]:
# Import necessary libraries
import pandas as pd
from IPython import InteractiveShell

# Set IPython setting to display all outputs
InteractiveShell.ast_node_interactivity = 'all'

# Import the cars.csv data into a DataFrame named 'cars'
cars = pd.read_csv('cars.csv', index_col=0)

# Display the entire DataFrame
print(cars)

# Print out the 'country' column as a Pandas Series
print(cars['country'])

# Print out the 'country' column as a Pandas DataFrame
print(cars[['country']])

# Print out DataFrame with 'country' and 'drives_right' columns
print(cars[['country', 'drives_right']])

# # Print out the first 3 observations in the DataFrame
# print(cars[0:3])

# # Print out the fourth, fifth, and sixth observations in the DataFrame
# print(cars[3:6])

# # Print out the observation for Japan using label-based indexing (loc)
#  print(cars.loc['JAP'])

# # Print out observations for Australia and Egypt using label-based indexing
# print(cars.loc[['AUS', 'EG']])

# # Print out the 'drives_right' value for Morocco using label-based indexing
# print(cars.loc['MOR', 'drives_right'])

# # Print sub-DataFrame for Russia and Morocco with selected columns
# print(cars.loc[['RU', 'MOR'], ['country', 'drives_right']])

# # Print out the 'drives_right' column as a Pandas Series
# print(cars.loc[:, 'drives_right'])

# # Print out the 'drives_right' column as a Pandas DataFrame
# print(cars.loc[:, ['drives_right']])

# # Print out 'cars_per_cap' and 'drives_right' columns as a DataFrame
# print(cars.loc[:, ['cars_per_cap', 'drives_right']])


    Unnamed: 0  cars_per_cap        country  drives_right
NaN         US           809  United States          True
NaN        AUS           731      Australia         False
NaN        JAP           588          Japan         False
NaN         IN            18          India         False
NaN         RU           200         Russia          True
NaN        MOR            70        Morocco          True
NaN         EG            45          Egypt          True
NaN    United States
NaN        Australia
NaN            Japan
NaN            India
NaN           Russia
NaN          Morocco
NaN            Egypt
Name: country, dtype: object
           country
NaN  United States
NaN      Australia
NaN          Japan
NaN          India
NaN         Russia
NaN        Morocco
NaN          Egypt
           country  drives_right
NaN  United States          True
NaN      Australia         False
NaN          Japan         False
NaN          India         False
NaN         Russia          True
NaN       