In [None]:
# -------------------------------
# 1️ Install Compatible Version
# -------------------------------
!pip install scikit-learn==1.0.2 joblib==1.1.0 --quiet

# -------------------------------
# 2️ Import Libraries
# -------------------------------
import pandas as pd
import numpy as np
import joblib
import tarfile
import sagemaker
from sklearn.ensemble import RandomForestClassifier
from sagemaker.sklearn.model import SKLearnModel
from sagemaker import get_execution_role
import boto3

# -------------------------------
# 3️ Initialize Session and Role
# -------------------------------
session = sagemaker.Session()
bucket = "mlpredictagri"   #  Your chosen bucket
role = get_execution_role()

# -------------------------------
# 4️ Generate a BALANCED Sample Dataset
# -------------------------------
data = []

#  Generate 500 irrigation-needed cases
for _ in range(500):
    temp = round(np.random.uniform(32, 40), 2)   # High temperature
    humidity = round(np.random.uniform(50, 90), 2)
    soil = round(np.random.uniform(5, 12), 2)    # Low soil moisture
    light = round(np.random.uniform(200, 800), 2)
    rain = 0                                     # No rain
    co2 = round(np.random.uniform(300, 500), 2)
    irrigation = 1
    data.append([temp, humidity, soil, light, rain, co2, irrigation])

#  Generate 500 no-irrigation-needed cases
for _ in range(500):
    temp = round(np.random.uniform(20, 40), 2)
    humidity = round(np.random.uniform(50, 90), 2)
    soil = round(np.random.uniform(15, 40), 2)   # Higher soil moisture
    light = round(np.random.uniform(200, 800), 2)
    rain = np.random.choice([0, 1])
    co2 = round(np.random.uniform(300, 500), 2)
    irrigation = 1 if soil < 15 and temp > 30 and rain == 0 else 0
    data.append([temp, humidity, soil, light, rain, co2, irrigation])

#  Create DataFrame
df = pd.DataFrame(data, columns=["temperature", "humidity", "soilMoisture", "light", "rain", "co2", "irrigation_needed"])
df.to_csv("sensor_training_data.csv", index=False)

# Split features and target
X = df[["temperature", "humidity", "soilMoisture", "light", "rain", "co2"]]
y = df["irrigation_needed"]

# -------------------------------
# 5️ Train RandomForest Model
# -------------------------------
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X, y)

# Save model
joblib.dump(model, "irrigation_model.joblib", protocol=2)
print(" Balanced Model trained and saved")

# -------------------------------
# 6️ Package Model (with predict.py)
# -------------------------------
!mkdir -p model
!cp irrigation_model.joblib model/
!cp predict.py model/
!tar -czf model.tar.gz -C model .

# Verify contents
!tar -tzf model.tar.gz

# -------------------------------
# 7️ Upload Model to S3
# -------------------------------
s3 = boto3.client('s3')
s3.upload_file("model.tar.gz", bucket, "models/model.tar.gz")
s3_path = f"s3://{bucket}/models/model.tar.gz"
print(" Model uploaded to:", s3_path)

# -------------------------------
# 8️ Deploy Model to Endpoint
# -------------------------------
model = SKLearnModel(
    model_data=s3_path,
    role=role,
    entry_point="predict.py",
    framework_version="1.0-1",
    py_version="py3"
)

predictor = model.deploy(
    instance_type="ml.t2.medium",
    initial_instance_count=1
)
print(" Balanced Model deployed successfully!")


 Balanced Model trained and saved
./
./predict.py
./irrigation_model.joblib
 Model uploaded to: s3://mlpredictagri/models/model.tar.gz
---

In [None]:
import boto3
import json

# Initialize SageMaker runtime client
runtime = boto3.client('sagemaker-runtime', region_name='us-east-1')

# Prepare test input (example sensor reading)
test_data = {
    "instances": [
        [35.5, 70.2, 12.0, 500, 0, 400]  
        # temperature, humidity, soilMoisture, light, rain, co2
    ]
}

# Invoke the endpoint
response = runtime.invoke_endpoint(
    EndpointName='sagemaker-scikit-learn-2025-08-04-13-14-15-963',
    ContentType='application/json',
    Body=json.dumps(test_data)
)

# Read and print the result
result = json.loads(response['Body'].read())
print("🔹 Prediction Result:", result)


In [None]:
import boto3
import json

# Initialize SageMaker runtime client
runtime = boto3.client('sagemaker-runtime', region_name='us-east-1')

# Prepare test input (example sensor reading)
test_data = {
    "instances": [
        [35.5, 70.2, 12.0, 500, 0, 400]  
        # temperature, humidity, soilMoisture, light, rain, co2
    ]
}

# Invoke the endpoint
response = runtime.invoke_endpoint(
    EndpointName='sagemaker-scikit-learn-2025-08-04-13-14-15-963',
    ContentType='application/json',
    Body=json.dumps(test_data)
)

# Read and print the result
result = json.loads(response['Body'].read())
print("🔹 Prediction Result:", result)


In [1]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = df[["temperature", "humidity", "soilMoisture", "light", "rain", "co2"]]
y = df["irrigation_needed"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))

NameError: name 'df' is not defined

In [2]:
import pandas as pd

df = pd.read_csv("sensor_training_data.csv")


In [3]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

X = df[["temperature", "humidity", "soilMoisture", "light", "rain", "co2"]]
y = df["irrigation_needed"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print("✅ Accuracy:", accuracy_score(y_test, y_pred))

NameError: name 'RandomForestClassifier' is not defined

In [4]:
# ✅ 1. Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# ✅ 2. Load dataset (or create synthetic if file not found)
try:
    df = pd.read_csv("sensor_training_data.csv")
    print("Dataset loaded from CSV")
except FileNotFoundError:
    print("CSV not found, generating synthetic dataset...")
    data = []
    for _ in range(500):
        temp = round(np.random.uniform(32, 40), 2)
        humidity = round(np.random.uniform(50, 90), 2)
        soil = round(np.random.uniform(5, 12), 2)
        light = round(np.random.uniform(200, 800), 2)
        rain = 0
        co2 = round(np.random.uniform(300, 500), 2)
        irrigation = 1
        data.append([temp, humidity, soil, light, rain, co2, irrigation])

    for _ in range(500):
        temp = round(np.random.uniform(20, 40), 2)
        humidity = round(np.random.uniform(50, 90), 2)
        soil = round(np.random.uniform(15, 40), 2)
        light = round(np.random.uniform(200, 800), 2)
        rain = np.random.choice([0, 1])
        co2 = round(np.random.uniform(300, 500), 2)
        irrigation = 1 if soil < 15 and temp > 30 and rain == 0 else 0
        data.append([temp, humidity, soil, light, rain, co2, irrigation])

    df = pd.DataFrame(data, columns=["temperature", "humidity", "soilMoisture", "light", "rain", "co2", "irrigation_needed"])

# ✅ 3. Prepare data
X = df[["temperature", "humidity", "soilMoisture", "light", "rain", "co2"]]
y = df["irrigation_needed"]

# ✅ 4. Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ✅ 5. Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# ✅ 6. Predict and calculate accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("✅ Model Accuracy:", round(accuracy * 100, 2), "%")


Dataset loaded from CSV
✅ Model Accuracy: 100.0 %


In [6]:
# 1. Initialize git repo
!git init

# 2. Add your GitHub repo as a remote
!git remote add origin https://github.com/vivekananda4/finalthesis-.git

# 3. Add all files
!git add final_real_dataset.csv predict.py ml.ipynb

# 4. Commit
!git commit -m "Initial commit from Jupyter Notebook"

# 5. Set branch and push
!git branch -M main
!git push -u origin main


[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint:[m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint:[m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint:[m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /home/ec2-user/SageMaker/.git/
fatal: pathspec 'final_real_dataset.csv' did not match any files
On branch master

Initial commit

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.Trash-1000/[m
	[31m.ipynb_checkpoints/[m
	[31m.virtual_documents/[m
	[31mUntitled.ipynb[m
	[31mfinal_real_dataset_complete.csv[m
	[31mirrigation_model.joblib[m
	[31mml.ipynb[m
	[31mmodel.tar.gz[m
	[31mmodel/[m
	[31mpredict.py[m
	[31msensor_tra

In [7]:
# 1) (Optional) set 'main' as your default initial branch for future repos
git config --global init.defaultBranch main

# 2) Ensure you are in the right folder
cd /home/ec2-user/SageMaker

# 3) If you already ran git init, skip this; otherwise:
# git init

# 4) Create a .gitignore so junk doesn't get staged
cat > .gitignore <<'EOF'
.ipynb_checkpoints/
.Trash-1000/
.virtual_documents/
lost+found/
model/
*.tar.gz
*.joblib
EOF

# 5) Stage ONLY the files you want (adjust notebook names as needed)
git add final_real_dataset_complete.csv predict.py ml.ipynb Untitled.ipynb sensor_training_data.csv .gitignore

# 6) Commit
git commit -m "Add notebooks, dataset, and predict.py"

# 7) Rename local branch to main (so push matches)
git branch -M main

# 8) Add your GitHub remote (use your repo)
git remote remove origin 2>/dev/null || true
git remote add origin https://github.com/vivekananda4/finalthesis-.git

# 9) Push
git push -u origin main


SyntaxError: invalid syntax (2352784200.py, line 2)

In [None]:
!git config --global init.defaultBranch main
!cd /home/ec2-user/SageMaker

# Initialize repo if not already done
!git init

# Create .gitignore
!echo ".ipynb_checkpoints/\n.Trash-1000/\n.virtual_documents/\nlost+found/\nmodel/\n*.tar.gz\n*.joblib" > .gitignore

# Stage the right files
!git add final_real_dataset_complete.csv predict.py ml.ipynb Untitled.ipynb sensor_training_data.csv .gitignore

# Commit
!git commit -m "Add notebooks, dataset, and predict.py"

# Rename branch
!git branch -M main

# Link remote repo
!git remote remove origin || true
!git remote add origin https://github.com/vivekananda4/finalthesis-.git

# Push
!git push -u origin main


Reinitialized existing Git repository in /home/ec2-user/SageMaker/.git/
[main (root-commit) dd78636] Add notebooks, dataset, and predict.py
 Committer: EC2 Default User <ec2-user@ip-172-16-155-145.ec2.internal>
Your name and email address were configured automatically based
on your username and hostname. Please check that they are accurate.
You can suppress this message by setting them explicitly:

    git config --global user.name "Your Name"
    git config --global user.email you@example.com

After doing this, you may fix the identity used for this commit with:

    git commit --amend --reset-author

 6 files changed, 3042 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 Untitled.ipynb
 create mode 100644 final_real_dataset_complete.csv
 create mode 100644 ml.ipynb
 create mode 100644 predict.py
 create mode 100644 sensor_training_data.csv
Username for 'https://github.com/vivekananda4/finalthesis-.git': 

In [None]:
vivekananda4

In [None]:
!git push -u origin main
