In [None]:
pip install transformers

In [None]:
!pip install inflect

In [None]:
!pip install -U spacy


In [None]:
pip install accelerate -U


In [None]:
!pip install rouge-score
!pip install nltk


In [None]:
import re

def clean_conversations(input_file, output_file):
    requirements = []

    with open(input_file, 'r') as file:
        lines = file.readlines()

    for line in lines:
        if line.startswith('",'):
            req_list = re.findall(r"'([^']*)'", line)
            requirements.extend(req_list)

    with open(output_file, 'w') as f_output:
        for req in requirements:
            processed_req = re.sub(r'^\d+\.\s*', '', req.strip())
            f_output.write(processed_req + '\n')

    print(f"Il file è stato elaborato e salvato come {output_file}")

input_file = 'conversazioneRequisiti.txt'
output_file = 'outputRequisiti.txt'

# Esegui la pulizia delle conversazioni
clean_conversations(input_file, output_file)



Il file è stato elaborato e salvato come output.txt


In [None]:
#caricare la lista di requisiti per analizzare le parti del discorso
import spacy
nlp = spacy.load("en_core_web_sm")

def analyze_sentences_from_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()

    for line_number, sentence in enumerate(lines, start=1):
        print(f"\nAnalyzing sentence {line_number}: '{sentence.strip()}'")
        doc = nlp(sentence.strip())

        print("POS tagging and Dependency Parsing:")
        for token in doc:
            print(token.text, "-->", token.dep_, "-->", token.pos_)

file_path = 'outputRequisiti.txt'
analyze_sentences_from_file(file_path)








In [None]:

import spacy
import inflect

nlp = spacy.load("en_core_web_sm")

p = inflect.engine()

def to_singular(word):
    singular = p.singular_noun(word)
    return singular if singular else word

def identify_entities_from_text(file_path, output_file):
    with open(file_path, 'r') as file:
        text = file.read()


    doc = nlp(text)

    identified_entities = set()

    for token in doc:
       if (token.pos_ == "NOUN" or token.pos_ == "PROPN") and token.dep_ in ["nsubj"]:
            entity = to_singular(token.text)
            identified_entities.add(entity.lower())

    with open(output_file, 'w') as out_file:
        for entity in identified_entities:
            out_file.write(entity + '\n')

# Esempio di utilizzo
file_path = 'outputRequisiti.txt'
output_file = 'outputWho.txt'
identify_entities_from_text(file_path, output_file)




In [None]:
import spacy
from transformers import pipeline

nlp = spacy.load("en_core_web_sm")

def extract_user_story_what(who, requirement):
    domanda = f"Given the requirement {requirement}, what does the {who} aim to do? If there is not the {who} or the aim, answer 'No what was identified in this requirement'"
    qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

    answer = qa_pipeline(question=domanda, context=requirement)

    user_story_what = answer['answer']

    return user_story_what




def extract_user_story_what_from_file(who, file_path, output_file):
    with open(file_path, "r") as file:
        requirements = file.readlines()

    with open(output_file, "w") as out_file:
        for req in requirements:
            req = req.strip()
            user_story_what = extract_user_story_what(who, req)
            out_file.write(f"{user_story_what}\n")

who = ["user"]
input_file_path = "outputRequisiti.txt"
output_file_path = "outputWhat.txt"
extract_user_story_what_from_file(who, input_file_path, output_file_path)

In [None]:
import openpyxl

def read_excel(file_path):
    workbook = openpyxl.load_workbook(file_path)

    column_values = []

    for sheet_name in workbook.sheetnames:
        sheet = workbook[sheet_name]
        for row in sheet.iter_rows(min_row=2, max_col=6, values_only=True):
            if row[2]:
                column_values.append(row[2])

            if row[3]:
                column_values.append(row[3])

    workbook.close()

    return column_values

def write_to_file(values, output_file):
    with open(output_file, 'w') as file:
        for value in values:
            file.write(f"{value}\n")

file_path = "WhoWhatFinale.xlsx"
output_file = "outputMetriche.txt"
cell_values = read_excel(file_path)

write_to_file(cell_values, output_file)


In [None]:
import nltk
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

def calculate_rouge(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    return scores

def main(input_file):
    try:
        nltk.data.find('tokenizers/punkt')
    except LookupError:
        nltk.download('punkt')

    with open(input_file, 'r') as file:
        lines = file.readlines()

    references = lines[::2]
    predictions = lines[1::2]

    rouge1_scores = []
    rougeL_scores = []
    bleu_scores = []

    smoother = SmoothingFunction()

    for reference, prediction in zip(references, predictions):
        reference = reference.strip()
        prediction = prediction.strip()

        rouge_scores = calculate_rouge(reference, prediction)
        rouge1_score = rouge_scores['rouge1'].fmeasure
        rougeL_score = rouge_scores['rougeL'].fmeasure

        rouge1_scores.append(rouge1_score)
        rougeL_scores.append(rougeL_score)

        reference_tokens = nltk.word_tokenize(reference)
        prediction_tokens = nltk.word_tokenize(prediction)

        # Calcolo del punteggio BLEU con smoothing
        bleu_score = sentence_bleu([reference_tokens], prediction_tokens, smoothing_function=smoother.method1)
        bleu_scores.append(bleu_score)

        print(f"Reference: {reference}")
        print(f"Prediction: {prediction}")
        print(f"ROUGE-1 F-measure: {rouge1_score}")
        print(f"ROUGE-L F-measure: {rougeL_score}")
        print(f"BLEU score: {bleu_score}")
        print()

    rouge1_avg = sum(rouge1_scores) / len(rouge1_scores)
    rougeL_avg = sum(rougeL_scores) / len(rougeL_scores)
    bleu_avg = sum(bleu_scores) / len(bleu_scores)

    print(f"Overall ROUGE-1 F-measure: {rouge1_avg}")
    print(f"Overall ROUGE-L F-measure: {rougeL_avg}")
    print(f"Overall BLEU score: {bleu_avg}")

if __name__ == "__main__":
    input_file = "q3.txt"
    main(input_file)


In [None]:
#estrazione del who e what
import spacy
from nltk.corpus import wordnet
import inflect
from transformers import pipeline

nlp_spacy = spacy.load("en_core_web_sm")
p = inflect.engine()

def to_singular(word):
    singular = p.singular_noun(word)
    return singular if singular else word

def identify_entities_from_text(file_path):
    with open(file_path, 'r') as file:
        text = file.read()

    doc = nlp_spacy(text)
    identified_entities = {}

    for token in doc:
        if (token.pos_ == "NOUN" or token.pos_ == "PROPN") and token.dep_ in ["nsubj"]:
            token_text_lower = token.text.lower()
            if all(entity.lower() != token_text_lower for entity in identified_entities):
                entity = to_singular(token.text)
                identified_entities[entity.lower()] = []

    return identified_entities

def extract_user_story_what_from_file(file_path, output_file):
    who = identify_entities_from_text(file_path)
    print(who)

    qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad")

    with open(file_path, "r") as file:
        requirements = file.readlines()

    for req in requirements:
        req = req.strip()
        for user in who:
            domanda = f"Given the requirement {req}, what does the {user} aim to do? If there is not the {user} or the aim, answer 'None'"
            answer = qa_pipeline(question=domanda, context=req)
            user_story_what = answer['answer']
            who[user].append(user_story_what)

    with open(output_file, "w") as out_file:
        for entity, requirements in who.items():
            out_file.write(f"{entity}:\n")
            for req in requirements:
                out_file.write(f"{req}\n")
            out_file.write("\n")

file_path = 'requisitiFinali.txt'
output_file = 'output_what.txt'

extract_user_story_what_from_file(file_path, output_file)


{'system': [], 'user': [], 'team': [], 'ifa': [], 'guest': [], 'administrator': [], 'referee': [], 'fan': [], 'player': [], 'manager': [], 'owner': []}


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
