### Delete input files if they are already available

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
def delfile(file):
    import os
    try:
        os.remove(file)
    except OSError:
        pass

In [5]:
delfile("intent.md")
delfile("query.md")
delfile("agri.yml")
delfile("config.yml")

### Load data from Indian Government's Agriculture API

#### Note: You need to create a login at data.gov.in and generate an API key

In [6]:
import requests
api_key = r"579b464db66ec23bdd000001dbba9a854ca84a9b6d93cbcc5f51d998"
outputformat = r"json"
records = 5000
request = r'https://api.data.gov.in/catalog/19ba71d9-6d58-402d-9b75-a0ebdc034a56?api-key='+api_key+'&format='+outputformat+'&limit='+str(records)
response = requests.get(request)
data = eval(response.text)

### Verify the data loaded

In [7]:
data['records'][4999]

{'Season': 'NA',
 'Sector': 'AGRICULTURE',
 'Category': 'Others',
 'Crop': 'Others',
 'QueryType': 'Government Schemes',
 'QueryText': 'ask about general information',
 'KccAns': 'all information provided - thanks for calling in kisan call centre',
 'StateName': 'GUJARAT',
 'DistrictName': 'BANAS KANTHA',
 'BlockName': 'VAV',
 'CreatedOn': '2018-05-01T17:50:06.15'}

### Extract required data into lists

In [8]:
category = []
crop = []
querytype = []
querytext = []
kccans = []
identifier = []

for i in range(0,1000):
    category.append(data['records'][i]['Category'])
    crop.append(data['records'][i]['Crop'])
    querytype.append(data['records'][i]['QueryType'])
    identifier.append(i)
    querytext.append(data['records'][i]['QueryText'])
    kccans.append(data['records'][i]['KccAns'])
    

### Create a dataframe and load the above lists into dataframe columns

In [9]:
import pandas as pd
df = pd.DataFrame()

In [10]:
df["category"] = category
df["crop"] = crop
df["querytype"] = querytype
df["querytext"] = querytext
df["kccans"] = kccans
df['identifier'] = identifier

### Preprocess each column to prepare data for intent and actions 

In [11]:
df["intent"] = df["category"]+df["crop"]+df["querytype"]#+df["identifier"]

#### Function to remove special characters from the intent column

In [12]:
import re
def cleanString(x):
    return re.sub('[^A-Za-z0-9]+', '', x)

In [13]:
df["intent"] = df.apply(lambda x: cleanString(str(x["intent"])), axis =1)

In [14]:
df["intent_md"] = "## intent:" + df["intent"]

In [15]:
df["intent_*"] = "* " + df["intent"]

In [16]:
df["intent_-"] = "- " + df["intent"]

In [17]:
df["querytext_md"] = "- " + df["querytext"]

In [18]:
df_pivot = df.pivot_table(index=['intent_md'],
                                     values='querytext_md',
                                     aggfunc=lambda x: '\n'.join(x)).reset_index()

In [19]:
for i,j in zip(df_pivot["intent_md"], df_pivot["querytext_md"]):
    with open('intent.md', 'a') as f:
        print(i, '\n', j, file = f)
    f.close()
    

In [20]:
df["actions_md"] = "utter_" + df["intent"]

In [21]:
df["query_md"] = "## query_" + df["intent"]

In [22]:
df["actions_-"] = "- " + df["actions_md"]

In [23]:
df["actions_:"] = df["actions_md"] + ":"

#### Function to clean up answers

In [24]:
def cleanAnswer(x):
    return re.sub('[^A-Za-z0-9]+', ' ', x)

In [25]:
df["kccans"] = df["kccans"].replace(r'\\n','', regex=True) 
df["kccans"] = df.apply(lambda x: cleanAnswer(str(x["kccans"])), axis =1)

In [26]:
df["kccans_text"] = '- text: "' + df["kccans"] + '"'

In [27]:
df2 = df[["actions_:","kccans_text"]].drop_duplicates(["actions_:"])

In [28]:
df3 = df[["query_md","intent_*","actions_-"]].drop_duplicates(["intent_*"])

### Create query.md to define intent and actions

In [29]:
for i,j,k in zip(df3["query_md"], df3["intent_*"], df3["actions_-"]):
    with open('query.md', 'a') as f:
        print(i, '\n', j, '\n',' ', k, '\n', '\n', file = f)
    f.close()
    

### Create agri.yml to capture intent, actions and templates for answers

In [30]:
with open('agri.yml', 'a') as f:
        print('intents:', '\n', file = f)
f.close()

In [31]:
for i in df["intent_-"].unique():
    with open('agri.yml', 'a') as f:
        print(i, '\n', file = f)
    f.close()

In [32]:
with open('agri.yml', 'a') as f:
        print("\n", file = f)
        print("slots:\n", file = f)
        print("   group:\n", file = f)
        print("     type: text\n", file = f)
        print("\n", file = f)
        print("entities:\n", file = f)
        print("- group\n", file = f)
        print("\n", file = f)
f.close()

In [33]:
with open('agri.yml', 'a') as f:
        print('actions:', '\n', file = f)
f.close()

In [34]:
for i in df["actions_-"].unique():
    with open('agri.yml', 'a') as f:
        print(i, '\n', file = f)
    f.close()

In [35]:
with open('agri.yml', 'a') as f:
        #print("\n", file = f)
        print('templates:', '\n', file = f)
f.close()

In [36]:
for i,j in zip(df2["actions_:"],df2["kccans_text"]):
    with open('agri.yml', 'a') as f:
        print(' ',i, '\n', file = f)
        print(' ',j, '\n', file = f)
    f.close()

In [37]:
import rasa_nlu
import rasa_core
import spacy

In [38]:
config = """
language: "en"

pipeline:
- name: "nlp_spacy"                   
- name: "tokenizer_spacy"            
- name: "ner_crf"                 
- name: "intent_featurizer_spacy"    
- name: "intent_classifier_sklearn" 
- name: "ner_synonyms"                
""" 
with open('config.yml', 'a') as f:
        print(config, file = f)
        
f.close()

In [40]:
from rasa_nlu.training_data import load_data
from rasa_nlu.config import RasaNLUModelConfig
from rasa_nlu.model import Trainer
from rasa_nlu import config

# Load training data
training_data = load_data("intent.md")

# Use trainer to load configuration data the needs to be learnt by the model
trainer = Trainer(config.load("config.yml"))

# Train model on training data
interpreter = trainer.train(training_data)

# Save Model
model_directory = trainer.persist("./models/nlu")

Fitting 2 folds for each of 6 candidates, totalling 12 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done  12 out of  12 | elapsed:   15.3s finished


In [41]:
# Verify your question's confidence level
import json
def pprint(o):   
    print(json.dumps(o, indent=2))
    
pprint(interpreter.parse("insect in cotton?"))

{
  "intent": {
    "name": "FiberCropsCottonKapasPlantProtection",
    "confidence": 0.035218651357052086
  },
  "entities": [],
  "intent_ranking": [
    {
      "name": "FiberCropsCottonKapasPlantProtection",
      "confidence": 0.035218651357052086
    },
    {
      "name": "FiberCropsCottonKapasWeather",
      "confidence": 0.026391299062884507
    },
    {
      "name": "OthersOthersWeather",
      "confidence": 0.02567710942123305
    },
    {
      "name": "FruitsMangoPlantProtection",
      "confidence": 0.02522270149631183
    },
    {
      "name": "FiberCropsCottonKapasFertilizerUseandAvailability",
      "confidence": 0.023540140567565426
    },
    {
      "name": "VegetablesBhindiOkraLadysfingerPlantProtection",
      "confidence": 0.022249661565728537
    },
    {
      "name": "CerealsPaddyDhanPlantProtection",
      "confidence": 0.020199616710339936
    },
    {
      "name": "FiberCropsCottonKapasVarieties",
      "confidence": 0.01847487784535647
    },
    {
    

### Evaluate Model

In [42]:
from rasa_nlu.test import run_evaluation

run_evaluation("intent.md", model_directory)

100%|█████████████████████████████████████████████████████████████████████████████| 1000/1000 [00:06<00:00, 157.94it/s]


{'intent_evaluation': {'predictions': [{'text': 'Ask about Gernal information',
    'intent': 'AnimalBovineCowBuffaloAnimalBreeding',
    'predicted': 'OthersOthersCulturalPractices',
    'confidence': 0.09341556861142038},
   {'text': 'bakri ke muh me sujan ho gai hai kya kare',
    'intent': 'AnimalBovineCowBuffaloDiseaseManagement',
    'predicted': 'OthersOthersCulturalPractices',
    'confidence': 0.08146350310970434},
   {'text': 'DISEASE MANAGEMENT IN BUFFALO',
    'intent': 'AnimalBovineCowBuffaloDiseaseManagement',
    'predicted': 'OthersOthersPlantProtection',
    'confidence': 0.03057552536156595},
   {'text': 'Animal Husbandry Toll Free No.?',
    'intent': 'AnimalBovineCowBuffaloLivestockProductsProcessingandPackaging',
    'predicted': 'OthersOthersWeather',
    'confidence': 0.176755950796594},
   {'text': 'POULTRY FARM INFORMATION',
    'intent': 'AnimalPOULTRYFARMLivestockProductsProcessingandPackaging',
    'predicted': 'OthersOthersWeather',
    'confidence': 0.0589

In [None]:
from rasa_core.actions import Action
from rasa_core.events import SlotSet
from rasa_core.policies import FallbackPolicy, KerasPolicy, MemoizationPolicy
from rasa_core.agent import Agent

# Use a Fallback action in case model is unable to understand the question
fallback = FallbackPolicy(fallback_action_name="utter_OthersOthersGovernmentSchemes",
                          core_threshold=0.3,
                          nlu_threshold=0.3)

agent = Agent('agri.yml', policies=[MemoizationPolicy(), KerasPolicy(), fallback])

# Load query definitions
training_data = agent.load_data('query.md')

agent.train(
    training_data,epochs=50,
    validation_split=0.2
)

agent.persist('models/dialogue')

Processed Story Blocks: 100%|████████████████████████████████████████| 189/189 [00:00<00:00, 1181.24it/s, # trackers=1]
Processed Story Blocks: 100%|████████████████████████████████████████| 189/189 [00:00<00:00, 453.23it/s, # trackers=20]
Processed Story Blocks: 100%|████████████████████████████████████████| 189/189 [00:00<00:00, 292.30it/s, # trackers=20]
Processed Story Blocks: 100%|████████████████████████████████████████| 189/189 [00:00<00:00, 386.94it/s, # trackers=20]
Processed trackers:   4%|█▌                                       | 438/11340 [00:02<00:49, 218.90it/s, # actions=1001]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Processed trackers:  10%|████                                    | 1157/1134

Processed actions: 10612it [00:07, 1381.57it/s, # examples=10701]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Processed actions: 14894it [00:10, 1334.82it/s, # examples=14984]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

Processed actions: 19163it [00:14, 1272.65it/s, # examples=19213]IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, se

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
masking_3 (Masking)          (None, 5, 382)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 32)                53120     
_________________________________________________________________
dense_3 (Dense)              (None, 191)               6303      
_________________________________________________________________
activation_3 (Activation)    (None, 191)               0         
Total params: 59,423
Trainable params: 59,423
Non-trainable params: 0
_________________________________________________________________
Train on 18144 samples, validate on 4537 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Ep

### Start Agricultural chatbot

In [44]:
from rasa_core.agent import Agent
agent = Agent.load('models/dialogue', interpreter=model_directory)

In [45]:
print("Type your question here...")
while True:
    a = input()
    if a == 'stop':
        break
    responses = agent.handle_message(a)
    for response in responses:
        print(response["text"])

Type your question here...
insect in kapas
WEATHER REPORT DRY WEATHER NO CHANCE OF RAINFALL IN YOUR AREA
hello
WEATHER REPORT DRY WEATHER NO CHANCE OF RAINFALL IN YOUR AREA
hi
WEATHER REPORT DRY WEATHER NO CHANCE OF RAINFALL IN YOUR AREA
cotton
contact agriculture officer
weather
insect
contact agriculture officer
insect in cotton
contact agriculture officer
stop


### Next steps
This model has a low accuracy level currently and it needs to be enhanced by scaling the training questions and answers and tuning model hyperparameters