1.a. BSF

In [None]:
from collections import deque

def bfs(graph, start):
    visited = set()  # Use a set for faster membership check
    queue = deque([start])  # Use deque for efficient FIFO operations

    while queue:
        node = queue.popleft()
        if node not in visited:
            print(node, end=" ")  # Print the node being visited
            visited.add(node)
            queue.extend(graph[node])

# Define the graph
graph = {
    '5': ['3', '7'],
    '3': ['2', '4'],
    '7': ['8'],
    '2': [],
    '4': ['8'],
    '8': []
}

# Driver code
print("Following is the Breadth-First Search")
bfs(graph, '5')  # Start BFS from node '5'


1.b. DFS

In [None]:
def dfs(graph, start, visited=None):
    if visited is None:
        visited = set()
    visited.add(start)
    print(start, end=' ')

    for neighbor in graph[start]:
        if neighbor not in visited:
            dfs(graph, neighbor, visited)

# Example graph
graph = {
    '5': ['3', '7'],
    '3': ['2', '4'],
    '7': ['8'],
    '2': [],
    '4': ['8'],
    '8': []
}

# Driver code
print("Following is the Depth-First Search")
dfs(graph, '5')


2. A* Search

In [None]:
import heapq

def heuristic(node, goal):
    # Define your heuristic function here.
    # This function should estimate the cost from the current node to the goal node.
    # Common heuristics include Manhattan distance, Euclidean distance, etc.
    # In this example, let's assume no heuristic (heuristic value always 0).
    return 0

def astar(graph, start, goal):
    # Open list to store nodes to be evaluated
    open_list = [(0, start)]  # Tuple (f_cost, node)
    # Closed list to store visited nodes
    closed_list = set()

    # Dictionary to store the cost from the start node to each node
    g_cost = {node: float('inf') for node in graph}
    g_cost[start] = 0

    # Dictionary to store the parent node of each node
    parents = {}

    while open_list:
        # Pop node with the minimum f_cost from open_list
        f_cost, current_node = heapq.heappop(open_list)

        # Check if current node is the goal
        if current_node == goal:
            path = []
            while current_node in parents:
                path.append(current_node)
                current_node = parents[current_node]
            path.append(start)
            return path[::-1]  # Reverse the path to start -> goal
         
        closed_list.add(current_node)

        # Explore neighbors of the current node
        for neighbor in graph[current_node]:
            # Calculate tentative g_cost from start to neighbor through current_node
            tentative_g_cost = g_cost[current_node] + graph[current_node][neighbor]

            # Update if this path is better than the previous one
            if tentative_g_cost < g_cost[neighbor]:
                g_cost[neighbor] = tentative_g_cost
                f_cost = tentative_g_cost + heuristic(neighbor, goal)
                heapq.heappush(open_list, (f_cost, neighbor))
                parents[neighbor] = current_node

    # If no path found
    return None

# Example graph
graph = {
    'A': {'B': 1, 'C': 4},
    'B': {'A': 1, 'C': 2, 'D': 5},
    'C': {'A': 4, 'B': 2, 'D': 1},
    'D': {'B': 5, 'C': 1}
}

# Driver code
start_node = 'A'
goal_node = 'D'
path = astar(graph, start_node, goal_node)

if path:
    print("Path found:", ' -> '.join(path))
else:
    print("No path found.")


5. Regression 

In [12]:
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn.model_selection import train_test_split
# Drop the rows with missing values
url = 'https://raw.githubusercontent.com/ageron/handson-ml/master/datasets/housing/housing.csv'
df = pd.read_csv(url)
df = df.dropna(axis=0)

# Convert the categorical variable to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['ocean_proximity'])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df.drop('median_house_value', axis=1), df['median_house_value'], test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Fit the model to the training data
model.fit(X_train, y_train)

# Evaluate the model on the testing data
print(model.score(X_test, y_test))


0.6488402154431997


6. Decision Tree and Random Forest

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the online dataset
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income']
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data', header=None, names=column_names)

# Split the dataset into features and target variable
X = df.drop('income', axis=1)
y = df['income']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define categorical features for one-hot encoding
categorical_features = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']

# Perform one-hot encoding
ct = ColumnTransformer([('onehot', OneHotEncoder(), categorical_features)], remainder='passthrough')
X_train_encoded = ct.fit_transform(X_train)
X_test_encoded = ct.transform(X_test)

# Build a decision tree classifier
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train_encoded, y_train)

# Build a random forest classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_encoded, y_train)

# Evaluate the models
dt_accuracy = accuracy_score(y_test, dt_model.predict(X_test_encoded))
rf_accuracy = accuracy_score(y_test, rf_model.predict(X_test_encoded))

print('Decision Tree Accuracy:', dt_accuracy)
print('Random Forest Accuracy:', rf_accuracy)


7.SVM

In [None]:
# prompt: Build an SVM model using online dataset

import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Load the dataset
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data', header=None)

# Split the dataset into features and labels
X = df.iloc[:, 2:].values
y = df.iloc[:, 1].values

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an SVM model
clf = SVC()

# Train the model
clf.fit(X_train, y_train)

# Evaluate the model
score = clf.score(X_test, y_test)
print('Accuracy:', score)


8. Implementation of Ensembling Techniques

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score

# Load the dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the classifiers
rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
ada_clf = AdaBoostClassifier(n_estimators=100, random_state=42)
gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Train and evaluate each classifier individually
for clf, name in zip([rf_clf, ada_clf, gb_clf], ['Random Forest', 'AdaBoost', 'Gradient Boosting']):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'{name} Accuracy:', accuracy)

# Ensemble classifiers
ensemble_clf = VotingClassifier(estimators=[('rf', rf_clf), ('ada', ada_clf), ('gb', gb_clf)], voting='soft')

# Train ensemble classifier
ensemble_clf.fit(X_train, y_train)

# Predict using the ensemble classifier
y_pred = ensemble_clf.predict(X_test)

# Calculate accuracy of ensemble classifier
ensemble_accuracy = accuracy_score(y_test, y_pred)
print('Ensemble Accuracy:', ensemble_accuracy)


9. Various Clustering algorithms 

In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Load the dataset from an online source
df = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)

# Split the dataset into training and testing sets
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a list of classification algorithms
algorithms = [
    LogisticRegression(),
    KNeighborsClassifier(),
    DecisionTreeClassifier(),
    GaussianNB(),
    RandomForestClassifier(),
    AdaBoostClassifier(),
    GradientBoostingClassifier()
]

# Train and evaluate each algorithm
for algorithm in algorithms:
    model = algorithm.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{algorithm.__class__.__name__} accuracy: {accuracy}")


11. Simple NN Model

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load the dataset from the UCI Machine Learning Repository
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
column_names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
dataset = pd.read_csv(url, header=None, names=column_names)

# Convert class labels to numeric values
encoder = LabelEncoder()
dataset['class'] = encoder.fit_transform(dataset['class'])

# Split the dataset into features and labels
X = dataset.drop('class', axis=1)
y = dataset['class']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Create the model
model = tf.keras.Sequential([
    tf.keras.layers.Dense(10, activation='relu', input_shape=(4,)),
    tf.keras.layers.Dense(10, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, verbose=2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print('Loss:', loss)
print('Accuracy:', accuracy)


12. DeepLeaning NN Model

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

# Load the dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),  # Dropout layer for regularization
    Dense(64, activation='relu'),
    Dropout(0.5),  # Dropout layer for regularization
    Dense(3, activation='softmax')  # Output layer with softmax activation for multi-class classification
])

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',  # Sparse categorical crossentropy for integer labels
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, verbose=2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print('Test Loss:', loss)
print('Test Accuracy:', accuracy)

# Predictions
y_pred = model.predict(X_test)
y_pred_classes = y_pred.argmax(axis=-1)
print('Test Accuracy (using sklearn):', accuracy_score(y_test, y_pred_classes))


3 .Implementation of  Naive Bayes models

In [None]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Naive Bayes classifier
model = GaussianNB()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)


In [13]:
import pandas as pd
from pgmpy.estimators import BayesianEstimator
from pgmpy.models import BayesianModel

# Load the dataset from an online source
url = 'https://raw.githubusercontent.com/pgmpy/pgmpy/master/examples/datasets/cancer.csv'
df = pd.read_csv(url, delimiter=',')

# Create a Bayesian network model
model = BayesianModel()

# Define the structure of the model
model.add_nodes_from(['surgery', 'radiotherapy', 'chemotherapy', 'age', 'cancer'])
model.add_edges_from([
    ('surgery', 'cancer'),
    ('radiotherapy', 'cancer'),
    ('chemotherapy', 'cancer'),
    ('age', 'cancer')
])

# Fit the model to the data
model.fit(df, estimator=BayesianEstimator, complete_samples_only=True)

# Print the learned CPDs
print(model.get_cpds())


HTTPError: HTTP Error 404: Not Found

4. Implement Bayesian Networks 

In [None]:
import numpy as np
import pandas as pd
import csv 
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination

heartDisease = pd.read_csv('E:/Subjects/BE/CS3491_AI/7-dataset.csv')
heartDisease = heartDisease.replace('?',np.nan)

print('Sample instances from the dataset are given below')
print(heartDisease.head())

print('\n Attributes and datatypes')
print(heartDisease.dtypes)

model= BayesianModel([('age','heartdisease'),('gender','heartdisease'),('exang','heartdisease'),('cp','heartdisease'),('heartdisease','restecg'),('heartdisease','chol')])
print('\nLearning CPD using Maximum likelihood estimators')
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)

print('\n Inferencing with Bayesian Network:')
HeartDiseasetest_infer = VariableElimination(model)

print('\n 1. Probability of HeartDisease given evidence= restecg')
q1=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'restecg':1})
print(q1)

print('\n 2. Probability of HeartDisease given evidence= cp ')
q2=HeartDiseasetest_infer.query(variables=['heartdisease'],evidence={'cp':2})
print(q2)