In [36]:

from flask import Flask, request, jsonify


In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import nltk
from nltk.corpus import stopwords
import re
import pickle



In [38]:

nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [39]:

stop_words = set(stopwords.words('english'))

In [40]:
app = Flask(__name__)


In [41]:

def preprocess_text(text):
    text = re.sub(r'\W', ' ', text)
    text = text.lower()
    text = [word for word in text.split() if word not in stop_words]
    return ' '.join(text)

In [42]:
df = pd.read_csv('large_ai_human_generated_text_dataset_fixed (1).csv')

In [43]:
df['cleaned_paragraph'] = df['Paragraph'].apply(preprocess_text)

In [44]:
x = df['cleaned_paragraph']
y = df['Label']

In [45]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)


In [46]:
vectorizer = TfidfVectorizer(max_features=5000)
x_train_tfidf = vectorizer.fit_transform(x_train)
x_test_tfidf = vectorizer.transform(x_test)


In [47]:
model = LogisticRegression()
model.fit(x_train_tfidf, y_train)
y_pred = model.predict(x_test_tfidf)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        90
           1       1.00      1.00      1.00       110

    accuracy                           1.00       200
   macro avg       1.00      1.00      1.00       200
weighted avg       1.00      1.00      1.00       200



In [48]:
import pickle

# Save model
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)


In [None]:
# save vectorizer
with open('vectorizer.pkl', 'wb') as vectorizer_file:
    pickle.dump(vectorizer, vectorizer_file)

In [49]:
with open('model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)
with open('vectorizer.pkl', 'rb') as vectorizer_file:
    vectorizer = pickle.load(vectorizer_file)


In [50]:
def predict_paragraph(text):
    processed_text = preprocess_text(text)
    vectorized_text = vectorizer.transform([processed_text])
    prediction = model.predict(vectorized_text)
    return "AI-generated" if prediction == 1 else "Human-generated"


AI-generated


In [None]:

test_text = "Artificial intelligence has greatly improved automation capabilities."
print(predict_paragraph(test_text))


In [51]:
test_text = "GPT-3 has been instrumental in natural language generation tasks."
print(predict_paragraph(test_text))


AI-generated


In [52]:
test_text_1=("The sun was setting over the mountains, casting a golden glow over the valley below Birds were chirping as the cool evening breeze gently swayed the trees")
print(predict_paragraph(test_text_1))

Human-generated


In [54]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    text = request.json['text']
    prediction = predict_paragraph(text)
    return jsonify({'prediction': prediction})

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with watchdog (windowsapi)


SystemExit: 1

In [None]:

def get_input_and_predict():
  text = input("Enter the text: ")
  prediction = predict_paragraph(text)
  print(f"Prediction: {prediction}")

In [None]:
get_input_and_predict()

Prediction: Human-generated
