In [1]:
import pandas as pd
import numpy as np

In [3]:
np.random.seed(123)

In [4]:
num_customers = 10
df = pd.DataFrame({
    'Customer_ID': [f'C{i+1}' for i in range(num_customers)],
    'Monthly_Calls': np.random.randint(100, 500, num_customers),
    'Monthly_SMS': np.random.randint(50, 200, num_customers),
    'Data_Usage_GB': np.random.uniform(5, 50, num_customers),
    'Average_Bill': np.random.uniform(30, 150, num_customers),
    'Subscription_Type': np.random.choice([0, 1], num_customers)  # 0: Prepaid, 1: Postpaid
})

In [5]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [6]:
df.columns

Index(['Customer_ID', 'Monthly_Calls', 'Monthly_SMS', 'Data_Usage_GB',
       'Average_Bill', 'Subscription_Type'],
      dtype='object')

In [8]:
scaler = MinMaxScaler()

In [7]:
features = ['Monthly_Calls', 'Monthly_SMS', 'Data_Usage_GB','Average_Bill', 'Subscription_Type']

In [9]:
df_scaled = scaler.fit_transform(df[features])

### User Based 

In [13]:
df

Unnamed: 0,Customer_ID,Monthly_Calls,Monthly_SMS,Data_Usage_GB,Average_Bill,Subscription_Type
0,C1,465,146,31.725608,80.065194,0
1,C2,482,163,33.430641,108.566557,0
2,C3,422,176,24.811573,74.856171,1
3,C4,198,97,8.767692,58.141545,1
4,C5,330,123,37.054858,148.559434,1
5,C6,117,82,24.253857,121.919514,1
6,C7,183,161,18.400122,123.240533,0
7,C8,206,133,27.143815,33.357835,0
8,C9,223,128,38.313338,50.868782,0
9,C10,157,146,21.097801,48.489869,0


In [None]:
similarity_matrix = cosine_similarity(df_scaled)

In [14]:
similarity_df = pd.DataFrame(similarity_matrix, index = df['Customer_ID'], columns= df['Customer_ID'] )

In [15]:
similarity_df

Customer_ID,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
C1,1.0,0.990578,0.791131,0.263023,0.737213,0.35975,0.744924,0.861796,0.853168,0.791328
C2,0.990578,1.0,0.797532,0.278904,0.767233,0.406281,0.82623,0.844661,0.837867,0.817653
C3,0.791131,0.797532,1.0,0.760283,0.854556,0.646925,0.680275,0.712789,0.648396,0.725012
C4,0.263023,0.278904,0.760283,1.0,0.719406,0.805834,0.267947,0.154402,0.142612,0.186454
C5,0.737213,0.767233,0.854556,0.719406,1.0,0.895296,0.698753,0.609797,0.692542,0.588131
C6,0.35975,0.406281,0.646925,0.805834,0.895296,1.0,0.46791,0.277321,0.404355,0.286606
C7,0.744924,0.82623,0.680275,0.267947,0.698753,0.46791,1.0,0.677265,0.649108,0.843384
C8,0.861796,0.844661,0.712789,0.154402,0.609797,0.277321,0.677265,1.0,0.959429,0.932882
C9,0.853168,0.837867,0.648396,0.142612,0.692542,0.404355,0.649108,0.959429,1.0,0.846464
C10,0.791328,0.817653,0.725012,0.186454,0.588131,0.286606,0.843384,0.932882,0.846464,1.0


In [17]:
array = similarity_df.to_numpy()
np.fill_diagonal(array,np.nan)
df_replaced = pd.DataFrame(array, index = df['Customer_ID'], columns= df['Customer_ID'])

In [19]:
df_replaced

Customer_ID,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10
Customer_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
C1,,0.990578,0.791131,0.263023,0.737213,0.35975,0.744924,0.861796,0.853168,0.791328
C2,0.990578,,0.797532,0.278904,0.767233,0.406281,0.82623,0.844661,0.837867,0.817653
C3,0.791131,0.797532,,0.760283,0.854556,0.646925,0.680275,0.712789,0.648396,0.725012
C4,0.263023,0.278904,0.760283,,0.719406,0.805834,0.267947,0.154402,0.142612,0.186454
C5,0.737213,0.767233,0.854556,0.719406,,0.895296,0.698753,0.609797,0.692542,0.588131
C6,0.35975,0.406281,0.646925,0.805834,0.895296,,0.46791,0.277321,0.404355,0.286606
C7,0.744924,0.82623,0.680275,0.267947,0.698753,0.46791,,0.677265,0.649108,0.843384
C8,0.861796,0.844661,0.712789,0.154402,0.609797,0.277321,0.677265,,0.959429,0.932882
C9,0.853168,0.837867,0.648396,0.142612,0.692542,0.404355,0.649108,0.959429,,0.846464
C10,0.791328,0.817653,0.725012,0.186454,0.588131,0.286606,0.843384,0.932882,0.846464,


In [20]:
df_replaced.idxmax(axis=1).head()

Customer_ID
C1    C2
C2    C1
C3    C5
C4    C6
C5    C6
dtype: object

### Item Based Similarity

In [21]:
offers = pd.DataFrame({
    'Offer_ID': ['O1', 'O2', 'O3', 'O4'],
    'Discount': [5, 10, 15, 20],  # Percentage discount
    'Extra_GB': [2, 5, 10, 15],   # Extra data provided
    'Free_SMS': [50, 100, 200, 500],
    'Monthly_Cost': [10, 20, 30, 50]
})

In [24]:
offers_scaled = scaler.fit_transform(offers.drop(columns= ["Offer_ID"]))

In [26]:
offers_similarity = cosine_similarity(offers_scaled)

In [31]:
np.fill_diagonal(offers_similarity, np.nan)
offers_similarity_df = pd.DataFrame(offers_similarity, index = offers['Offer_ID'], columns= offers['Offer_ID'])

In [32]:
offers_similarity_df.idxmax(axis=1).head()

Offer_ID
O1    O2
O2    O3
O3    O2
O4    O3
dtype: object

In [30]:
type(offers_similarity)

numpy.ndarray

### Factors decomposiiton

In [33]:
from sklearn.decomposition import TruncatedSVD

In [34]:
interaction_matrix = np.random.randint(0,2,(num_customers, len(offers)))

In [35]:
interaction_matrix

array([[1, 1, 0, 0],
       [0, 0, 1, 0],
       [1, 1, 0, 1],
       [1, 1, 0, 1],
       [1, 1, 0, 0],
       [0, 1, 0, 0],
       [1, 1, 1, 0],
       [1, 1, 0, 0],
       [1, 1, 0, 1],
       [1, 1, 1, 1]])

In [39]:
svd = TruncatedSVD(n_components= 2)
user_factors = svd.fit_transform(interaction_matrix)
item_factors = svd.components_


In [40]:
predicted_matrix = np.dot(user_factors, item_factors)

In [41]:
predicted_matrix

array([[ 0.8313468 ,  0.88420613,  0.06355284,  0.48842193],
       [ 0.05058748,  0.01296536,  0.98546534, -0.10768572],
       [ 1.06550123,  1.13847364, -0.04413289,  0.64356903],
       [ 1.06550123,  1.13847364, -0.04413289,  0.64356903],
       [ 0.8313468 ,  0.88420613,  0.06355284,  0.48842193],
       [ 0.42808268,  0.45612345,  0.01296536,  0.25426751],
       [ 0.88193428,  0.89717148,  1.04901817,  0.38073621],
       [ 0.8313468 ,  0.88420613,  0.06355284,  0.48842193],
       [ 1.06550123,  1.13847364, -0.04413289,  0.64356903],
       [ 1.11608871,  1.15143899,  0.94133245,  0.53588331]])

In [55]:
predicted_df = pd.DataFrame(predicted_matrix, index = df['Customer_ID'], columns= offers['Offer_ID'])

In [51]:
from langchain_groq import ChatGroq

In [52]:
api_key = "gsk_"
llm=ChatGroq(groq_api_key=api_key,model_name="Gemma2-9b-It")

In [59]:
def generate_offer(customer):
    similar_customer = similarity_df[customer].idxmax()
    best_offer = predicted_df.loc[customer].idxmax()
    customer_data = df[df['Customer_ID'] == customer].iloc[0]

    prompt = f"""
    Customer {customer} has a usage pattern similar to {similar_customer}.
    They make {customer_data['Monthly_Calls']} calls, send {customer_data['Monthly_SMS']} SMS, 
    use {round(customer_data['Data_Usage_GB'], 2)} GB of data per month, 
    and pay an average bill of ${round(customer_data['Average_Bill'], 2)}.
    Their subscription type is {"Postpaid" if customer_data['Subscription_Type'] == 1 else "Prepaid"}.

    Based on past telecom offers, we recommend {best_offer}.

    Suggest a personalized offer using historical data and behavioral patterns.


    """
    try:
        offer = llm(prompt)
    except Exception as e:
        offer = f"Error generating offer: {e}"

    return offer

In [None]:
df['Recommended_Offer'] = df['Customer_ID'].apply(generate_offer)