In [10]:
import sys
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt
import re
from collections import deque
import sklearn

### count numbers less than a value from a sorted list

In [5]:
def count_numbers(sorted_list, less_than):
    '''
    takes in a sorted list of unique integers and an integer less_than
    counts the number of list elements that are less than the parameter less_than
    '''
    start = 0
    end = len(sorted_list) -1
    while start <= end:
        mid = (start + end) // 2
        
        if sorted_list[mid] < less_than:
            # Move the start to mid + 1 since we know elements before mid are valid
            start = mid + 1
        else:
            # Move the end to mid - 1 since sorted_list[mid] is >= less_than
            end = mid - 1

    # At this point, 'start' will be the count of elements less than 'less_than'
    return start

if __name__ == "__main__":
    sorted_list = [1, 3, 5, 7]
    print(count_numbers(sorted_list, 4)) # should print 2
    sorted_list = [1, 3, 4, 5, 7]
    print(count_numbers(sorted_list, 4)) # should print 2

2
2


### Reverse dictionary
- Accepts a dict containing the file owner name for each file name
- returns a dict containing a list of file names for each owner name, in any order

In [1]:
def group_by_owners(files):
    new_dict = {}
    for key, value in files.items():
        if value in new_dict:
            new_dict[value].append(key)
        else:
            new_dict[value] = [key]
    return new_dict

if __name__ == "__main__":    
    files = {
        'Input.txt': 'Randy',
        'Code.py': 'Stan',
        'Output.txt': 'Randy'
    }   
    print(group_by_owners(files))

{'Randy': ['Input.txt', 'Output.txt'], 'Stan': ['Code.py']}


### Sliding window

In [4]:
def simulate(entries):
    """
    Simulate the malware behavior on the input entries.
    
    :param entries: (list(int)) The numerical record files
    :returns: (list(int)) The record files after running the malware
    Tl, -, -, X, -, -, -, Tr
    """
    zero_idx = []
    offset1 = 3  # constant offset for Tl and Tr relative to X
    offset2 = 4

    # First pass to find all the indices that need to be zeroed
    for i in range(len(entries)): # 0-9
        tl_index = i - offset1  # Calculate the index for Tl -3
        tr_index = i + offset2  # Calculate the index for Tr 4
        
        should_zero = False

        # If Tl is within bounds, check if it is greater than or equal to X
        if tl_index >= 0 and entries[tl_index] >= entries[i]:
            should_zero = True
        
        # If Tr is within bounds, check if it is greater than or equal to X
        if tr_index < len(entries) and entries[tr_index] >= entries[i]:
            should_zero = True

        # If any condition is true, mark the index 'i' for zeroing
        if should_zero:
            zero_idx.append(i)

    # Second pass to zero the identified positions
    for idx in zero_idx:
        entries[idx] = 0

    return entries

# Example usage
records = [1, 2, 0, 5, 0, 2, 4, 3, 3, 3]
print(simulate(records))

# Expected output
# [1, 0, 0, 5, 0, 0, 0, 3, 3, 0]

[1, 0, 0, 5, 0, 0, 0, 3, 3, 0]


### Linear Regression

In [15]:
import numpy as np
from sklearn.linear_model import LinearRegression

def desired_marketing_expenditure(marketing_expenditure, units_sold, desired_units_sold):
    """
    :param marketing_expenditure: (list) A list of integers with the expenditure for each previous campaign.
    :param units_sold: (list) A list of integers with the number of units sold for each previous campaign.
    :param desired_units_sold: (integer) Target number of units to sell in the new campaign.
    :returns: (float) Required amount of money to be invested.
    """
    # Reshape data for sklearn
    marketing_expenditure = np.array(marketing_expenditure).reshape(-1, 1)
    units_sold = np.array(units_sold).reshape(-1, 1)

    # Create and fit the linear regression model
    model = LinearRegression()
    model.fit(units_sold, marketing_expenditure)
    
    # Predict marketing expenditure for desired units sold
    required_expenditure = model.predict(np.array([[desired_units_sold]]))
    
    return float(required_expenditure[0][0])

# Example
print(desired_marketing_expenditure(
    [300000, 200000, 400000, 300000, 100000],
    [60000, 50000, 90000, 80000, 30000],
    60000))


250877.19298245612


In [18]:
a = [300000, 200000, 400000, 300000, 100000]
y = np.array(a).reshape(-1, 1)
y

array([[300000],
       [200000],
       [400000],
       [300000],
       [100000]])

In [17]:
b=[60000, 50000, 90000, 80000, 30000]
X = np.array(b).reshape(-1, 1)


In [14]:
# Create and fit the linear regression model
model = LinearRegression()
model.fit(X, y)

# Predict marketing expenditure for desired units sold
output = model.predict(np.array([[60000]]))
float(output[0][0])

250877.19298245612

### Modify dataframe in place

In [56]:
import pandas as pd
import numpy as np

def login_table(id_name_verified, id_password):
    """
    :param id_name_verified: (DataFrame) DataFrame with columns: Id, Login, Verified.   
    :param id_password: (numpy.array) Two-dimensional NumPy array where each element
                        is an array that contains: Id and Password
    :returns: (None) The function should modify id_name_verified DataFrame in-place. 
              It should not return anything.
    """
    # Drop 'Verified' column in place
    id_name_verified.drop(['Verified'], axis=1, inplace=True)
    
    # Create a DataFrame from id_password array
    df_password = pd.DataFrame(id_password, columns=['Id', 'Password'])


    # Merge the original DataFrame with the password DataFrame in place
    id_name_verified.set_index('Id', inplace=True)
    df_password.set_index('Id', inplace=True)
    
    # Update id_name_verified with the 'Password' column
    id_name_verified['Password'] = df_password['Password']

    # Reset index if needed
    id_name_verified.reset_index(inplace=True)
    pass

# Example usage
id_name_verified = pd.DataFrame([[1, "JohnDoe", True], [2, "AnnFranklin", False]], columns=["Id", "Login", "Verified"])
id_password = np.array([[1, 987340123], [2, 187031122]], np.int32)

login_table(id_name_verified, id_password)
print(id_name_verified)



   Id        Login   Password
0   1      JohnDoe  987340123
1   2  AnnFranklin  187031122


### panda dataframe functions percentage changes and correlation 

In [2]:
import pandas as pd
import numpy as np

def most_corr(prices):
    """
    :param prices: (pandas.DataFrame) A dataframe containing each ticker's 
                   daily closing prices.
    :returns: (container of strings) A container, containing the two tickers that 
              are the most highly (linearly) correlated by daily percentage change.
    """
    # Calculate daily percentage change using pandas' vectorized function
    percentage_changes = prices.pct_change().dropna()

    # Compute the correlation matrix directly using pandas' corr() method
    corr_matrix = percentage_changes.corr()

    # Set the diagonal to 0 to avoid self-correlation
    np.fill_diagonal(corr_matrix.values, 0)

    # Find the indices of the maximum correlation value
    idx = np.unravel_index(np.argmax(corr_matrix.values), corr_matrix.shape)
    
    # Return the two most correlated tickers
    return prices.columns[idx[0]], prices.columns[idx[1]]


#For example, the code below should print: ('FB', 'MSFT')
print(most_corr(pd.DataFrame.from_dict({
    'GOOG' : [
        742.66, 738.40, 738.22, 741.16,
        739.98, 747.28, 746.22, 741.80,
        745.33, 741.29, 742.83, 750.50
    ],
    'FB' : [
        108.40, 107.92, 109.64, 112.22,
        109.57, 113.82, 114.03, 112.24,
        114.68, 112.92, 113.28, 115.40
    ],
    'MSFT' : [
        55.40, 54.63, 54.98, 55.88,
        54.12, 59.16, 58.14, 55.97,
        61.20, 57.14, 56.62, 59.25
    ],
    'AAPL' : [
        106.00, 104.66, 104.87, 105.69,
        104.22, 110.16, 109.84, 108.86,
        110.14, 107.66, 108.08, 109.90
    ]
})))

('FB', 'MSFT')


In [3]:
prices = pd.DataFrame.from_dict({
    'GOOG' : [
        742.66, 738.40, 738.22, 741.16,
        739.98, 747.28, 746.22, 741.80,
        745.33, 741.29, 742.83, 750.50
    ],
    'FB' : [
        108.40, 107.92, 109.64, 112.22,
        109.57, 113.82, 114.03, 112.24,
        114.68, 112.92, 113.28, 115.40
    ],
    'MSFT' : [
        55.40, 54.63, 54.98, 55.88,
        54.12, 59.16, 58.14, 55.97,
        61.20, 57.14, 56.62, 59.25
    ],
    'AAPL' : [
        106.00, 104.66, 104.87, 105.69,
        104.22, 110.16, 109.84, 108.86,
        110.14, 107.66, 108.08, 109.90
    ]
})
prices

Unnamed: 0,GOOG,FB,MSFT,AAPL
0,742.66,108.4,55.4,106.0
1,738.4,107.92,54.63,104.66
2,738.22,109.64,54.98,104.87
3,741.16,112.22,55.88,105.69
4,739.98,109.57,54.12,104.22
5,747.28,113.82,59.16,110.16
6,746.22,114.03,58.14,109.84
7,741.8,112.24,55.97,108.86
8,745.33,114.68,61.2,110.14
9,741.29,112.92,57.14,107.66


In [5]:
percentage_changes = prices.pct_change().dropna()

In [7]:
corr_matrix = percentage_changes.corr()
corr_matrix

Unnamed: 0,GOOG,FB,MSFT,AAPL
GOOG,1.0,0.834381,0.84752,0.858727
FB,0.834381,1.0,0.878943,0.872889
MSFT,0.84752,0.878943,1.0,0.861793
AAPL,0.858727,0.872889,0.861793,1.0


### Classifier

In [16]:
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier

def train_and_predict(train_input_features, train_outputs, prediction_features):
    """
    :param train_input_features: (numpy.array) A two-dimensional NumPy array where each element
                        is an array that contains: sepal length, sepal width, petal length, and petal width   
    :param train_outputs: (numpy.array) A one-dimensional NumPy array where each element
                        is a number representing the species of iris which is described in
                        the same row of train_input_features. 0 represents Iris setosa,
                        1 represents Iris versicolor, and 2 represents Iris virginica.
    :param prediction_features: (numpy.array) A two-dimensional NumPy array where each element
                        is an array that contains: sepal length, sepal width, petal length, and petal width
    :returns: (list) The function should return an iterable (like list or numpy.ndarray) of the predicted 
                        iris species, one for each item in prediction_features
    """   
    model = DecisionTreeClassifier()
    model.fit(train_input_features, train_outputs)
    y_pred = model.predict(prediction_features)
    return y_pred

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                    test_size=0.3, random_state=0)

y_pred = train_and_predict(X_train, y_train, X_test)
if y_pred is not None:
    print(metrics.accuracy_score(y_test, y_pred))

0.9777777777777777


In [9]:
from sklearn.datasets import load_iris
import pandas as pd

# Load the Iris dataset
iris = load_iris()
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [13]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                    test_size=0.3, random_state=0)


In [15]:
y_train

array([1, 2, 2, 2, 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 1, 0, 2, 1, 1, 1, 1,
       2, 0, 0, 2, 1, 0, 0, 1, 0, 2, 1, 0, 1, 2, 1, 0, 2, 2, 2, 2, 0, 0,
       2, 2, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 0, 1, 2, 2, 0, 0, 0, 1, 1, 0,
       0, 1, 0, 2, 1, 2, 1, 0, 2, 0, 2, 0, 0, 2, 0, 2, 1, 1, 1, 2, 2, 1,
       1, 0, 1, 2, 2, 0, 1, 1, 1, 1, 0, 0, 0, 2, 1, 2, 0])

In [1]:
import pandas as pd
import numpy as np

# Load the data (replace with actual file loading if needed)
data = """TRUE,91
TRUE,23
TRUE,76
FALSE,48
FALSE,36
FALSE,36
TRUE,92
TRUE,88
TRUE,51
FALSE,10
FALSE,28
FALSE,62
TRUE,75
TRUE,80
TRUE,54
FALSE,72
FALSE,22
FALSE,50
FALSE,12
TRUE,29
TRUE,59
TRUE,78
FALSE,32
TRUE,93"""

In [2]:
data

'TRUE,91\nTRUE,23\nTRUE,76\nFALSE,48\nFALSE,36\nFALSE,36\nTRUE,92\nTRUE,88\nTRUE,51\nFALSE,10\nFALSE,28\nFALSE,62\nTRUE,75\nTRUE,80\nTRUE,54\nFALSE,72\nFALSE,22\nFALSE,50\nFALSE,12\nTRUE,29\nTRUE,59\nTRUE,78\nFALSE,32\nTRUE,93'

In [5]:
# Create a DataFrame
from io import StringIO
df = pd.read_csv(StringIO(data), header=None, names=["Actual", "Score"])
df

Unnamed: 0,Actual,Score
0,True,91
1,True,23
2,True,76
3,False,48
4,False,36
5,False,36
6,True,92
7,True,88
8,True,51
9,False,10


In [9]:
df['True_label_Num'] = df['Actual'].apply(lambda x: 1 if x == 'TRUE' else 0)

# Find the optimal threshold
unique_scores = sorted(df['Score'].unique())  # Get unique score values


# Define a function to calculate accuracy at a given threshold
def calculate_accuracy(boundary):
    # Predict True for scores >= threshold, False otherwise
    df['Predicted'] = df['Score'].apply(lambda x:1 if x>= boundary else 0)
    correct_predictions = (df['Predicted'] == df['True_label_Num']).sum()
    # Calculate accuracy
    accuracy = correct_predictions/len(df)
    return accuracy


best_threshold = None
best_accuracy = 0

# Loop through each unique score and calculate accuracy
for threshold in unique_scores:
    accuracy = calculate_accuracy(threshold)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_threshold = threshold

print(f"Best threshold: {best_threshold}, Best accuracy: {best_accuracy}")


Best threshold: 93, Best accuracy: 0.9583333333333334


### if condition, platform changes speed

In [1]:
def calculate_final_speed(initial_speed, inclinations):
        
    # check the initial speed is a positive number
    if initial_speed == 0:
        return 0
    # # check the inclinations are numbers
    # for i in inclinations:
    #     if not isinstance(i, (int, float)):
    #         raise ValueError("Inclinations must be numbers")
    # check the inclinations are between -90 and 90
    # if not all(-90 <= i <= 90 for i in inclinations):
    #     raise ValueError("Inclinations must be between -90 and 90")
    final_speed = initial_speed
    # speed will decrease proportionally to the sin of the inclination (if inclination is positive - uphill, if 30, then 1/2 the original speed)
    # speed will increase as the angle is negative (downhill, if -45 degree, then + 45 to previous speed))
    # speed will be 0 if the inclination is 90
    # speed will be 0 if the inclination is -90
    # speed will be no changes if the inclination is 0  
    for i in inclinations:
        if i == 90 or i == -90:
            final_speed = 0
            break
        elif i > 0:  # Uphill, reduce speed based on sine of the angle
            final_speed -= i
        elif i < 0:  # Downhill, increase speed by the absolute value of the inclination
            final_speed += abs(i)

        # Ensure speed does not go below zero
        if final_speed < 0:
            final_speed = 0

    return final_speed



print(calculate_final_speed(60, [0, 30, 0, -45, 0]))

75


In [2]:
print(calculate_final_speed(60, [0, 30, 0, -45, 90]))

0


### if condition, discount

In [3]:
from enum import Enum, auto

class DiscountType(Enum):
    STANDARD = auto()
    # any weight, 6% discount
    SEASONAL = auto()
    # any weight, 12% discount
    WEIGHT = auto()
    # if weight <= 10, then 6% discount
    # if weight > 10, then 18% discount

def get_discounted_price(cart_weight, total_price, discount_type):
    discount = 0

    if discount_type == DiscountType.STANDARD:
        discount = 0.06  # 6% discount
    elif discount_type == DiscountType.SEASONAL:
        discount = 0.12  # 12% discount
    elif discount_type == DiscountType.WEIGHT:
        if cart_weight <= 10:
            discount = 0.06  # 6% discount
        else:
            discount = 0.18  # 18% discount
    
    # Apply the discount to the total price
    discounted_price = total_price * (1 - discount)
    
    return discounted_price

print(get_discounted_price(12, 100, DiscountType.WEIGHT))

82.0
