In [1]:
# Model training of text classiffiers using logreg, svm, mlp 
# Violeta Berdejo-Espinola
# November 2024

In [2]:
%pip install mpu scikit-learn imblearn embetter sentence_transformers matplotlib ipywidgets

Defaulting to user installation because normal site-packages is not writeable

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


# read data

In [3]:
import mpu

# raw corpus

corpus_raw = mpu.io.read('../data/corpus_raw.pickle')
corpus_raw_long = mpu.io.read('../data/corpus_raw_long.pickle')

x_raw = corpus_raw
x_raw_long = corpus_raw_long

# clean corpus

corpus = mpu.io.read('../data/corpus_clean.pickle')
corpus_long = mpu.io.read('../data/corpus_clean_long.pickle')

x = corpus
x_long = corpus_long

# pos, negs

pos = mpu.io.read('../data/pos.pickle')
neg = mpu.io.read('../data/neg.pickle')
y = [1] * len(pos) + [0] * len(neg)

In [4]:
weight_for_class_0 = len(x) / (len(neg) * 2) 
weight_for_class_1 = len(x) / (len(pos) * 2) 

# split data

In [5]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=42)
x_train_long, x_test_long, y_train_long, y_test_long = train_test_split(x_long, y, test_size=0.20, random_state=42)
x_train_r, x_test_r, y_train_r, y_test_r = train_test_split(x_raw, y, test_size=0.20, random_state=42)
x_train_r_long, x_test_r_long, y_train_r_long, y_test_r_long = train_test_split(x_raw_long, y, test_size=0.20, random_state=42)

In [None]:
from collections import Counter

counter = Counter()

for i in y_test:
    counter[i] +=1
    
print(counter)

Counter({0: 991, 1: 13})


# instantiate feature extractors, embedding models, resamplers, models 

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer # uses one-dim array of strings ~ shape (n,)
from sklearn.feature_extraction.text import CountVectorizer # returns arrays

vect_cv = CountVectorizer()
vect_tfidf = TfidfVectorizer()

model_mpnet = 'paraphrase-multilingual-mpnet-base-v2'
model_distill = 'distiluse-base-multilingual-cased-v1'

from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.over_sampling import ADASYN 

rus = RandomUnderSampler(random_state=42, sampling_strategy=1)
ros = RandomOverSampler(random_state=42, sampling_strategy='not majority')
ada = ADASYN(random_state=42)

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

logreg = LogisticRegression(solver='liblinear', random_state=42)
logreg_weight = LogisticRegression(solver='liblinear', class_weight={0: weight_for_class_0, 1: weight_for_class_1}, random_state=42)
svm = SVC(kernel='linear')
svm_weight = SVC(kernel='linear', class_weight={0: weight_for_class_0, 1: weight_for_class_1}, probability=True)
mlp = MLPClassifier(activation='logistic', batch_size=16, hidden_layer_sizes=(), learning_rate='constant',learning_rate_init=0.001, solver='adam', random_state=42)

# function to train eval models

In [8]:
from imblearn.pipeline import Pipeline
from imblearn.pipeline import make_pipeline 

from embetter.text import SentenceEncoder

from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import StratifiedKFold

from sklearn.metrics import f1_score, precision_score, recall_score

import pandas as pd

In [None]:
# TF TFIDF baseline and weighted

def train_eval_tf_tfidf(x_train, y_train, x_test, y_test, text_length, kfold):
    
    def run_estimator(estimator, feature_extractor, balanced):
        
        pipeline = Pipeline([("vectorizer", feature_extractor),
                             ("estimator", estimator)
                            ])
    
        y_train_pred_cv = cross_val_predict(pipeline, x_train, y_train, cv=StratifiedKFold(kfold), method='predict') # cross val splits the data and then applies the pipeline steps

        pipeline.fit(x_train, y_train)
        
        y_train_pred = pipeline.predict(x_train)

        y_test_pred = pipeline.predict(x_test)

        return {
            'Classifier': pipeline['estimator'],
            'Feature_extraction': pipeline['vectorizer'],
            'Weighting': 'Weighted' if balanced else 'None',
            'CV': kfold,
            'Text_length': text_length,
            'F1_tr_cv': round(f1_score(y_train, y_train_pred_cv), 3),
            'F1_tr': round(f1_score(y_train, y_train_pred), 3),
            'F1_ts': round(f1_score(y_test, y_test_pred), 3),
            'Precision_tr_cv': round(precision_score(y_train, y_train_pred_cv), 3),
            'Precision_tr': round(precision_score(y_train, y_train_pred), 3),
            'Precision_ts': round(precision_score(y_test, y_test_pred), 3),
            'Recall_tr_cv': round(recall_score(y_train, y_train_pred_cv), 3),
            'Recall_tr': round(recall_score(y_train, y_train_pred), 3),
            'Recall_ts': round(recall_score(y_test, y_test_pred), 3)
        }


    all_scores = []
    
    for feature_extractor in [vect_cv, vect_tfidf]:
        
        for estimator in [logreg, svm]:
            all_scores.append(run_estimator(estimator, feature_extractor, False))
            
        for estimator in [logreg_weight, svm_weight]: 
            all_scores.append(run_estimator(estimator, feature_extractor, True))

    return all_scores

In [10]:
# embeddings baseline and weighted

def train_eval_embeddings(x_train, y_train, x_test, y_test, text_length, kfold):
    
    def run_estimator(estimator, balanced):
        
        pipeline = make_pipeline(
            SentenceEncoder(embedding_model),
            estimator
        )
    
        y_train_pred_cv = cross_val_predict(pipeline, x_train, y_train, cv=StratifiedKFold(kfold), method='predict')

        pipeline.fit(x_train, y_train)
        
        y_train_pred = pipeline.predict(x_train)

        y_test_pred = pipeline.predict(x_test)

        return {
            'Classifier': estimator,
            'Feature_extraction': embedding_model,
            'Weighting': 'Weighted' if balanced else None,
            'CV': kfold,
            'Text_length': text_length,
            'F1_tr_cv': round(f1_score(y_train, y_train_pred_cv), 3),
            'F1_tr': round(f1_score(y_train, y_train_pred), 3),
            'F1_ts': round(f1_score(y_test, y_test_pred), 3),
            'Precision_tr_cv': round(precision_score(y_train, y_train_pred_cv), 3),
            'Precision_tr': round(precision_score(y_train, y_train_pred), 3),
            'Precision_ts': round(precision_score(y_test, y_test_pred), 3),
            'Recall_tr_cv': round(recall_score(y_train, y_train_pred_cv), 3),
            'Recall_tr': round(recall_score(y_train, y_train_pred), 3),
            'Recall_ts': round(recall_score(y_test, y_test_pred), 3)
        }


    all_scores = []
    
    for embedding_model in [model_mpnet, model_distill]:
        
            for estimator in [logreg, svm, mlp]:
                all_scores.append(run_estimator(estimator, False))
                
            for estimator in [logreg_weight, svm_weight]: 
                all_scores.append(run_estimator(estimator, True))

    return all_scores

In [18]:
# TF TFIDF resampled

def train_eval_tf_tfidf_resampled(x_train, y_train, x_test, y_test, text_length, kfold):
    
    all_scores = []
    
    for feature_extractor in [vect_cv, vect_tfidf]:
        
        for resampler in [rus, ros, ada]:
        
            for estimator in [logreg, svm, mlp]:
        
                pipeline = Pipeline([("vectorizer", feature_extractor),
                                    ("resampler", resampler),
                                    ("estimator", estimator)
                                    ])
        
                y_train_pred_cv = cross_val_predict(pipeline, x_train, y_train, cv=StratifiedKFold(kfold), method='predict')

                pipeline.fit(x_train, y_train)
                
                y_train_pred = pipeline.predict(x_train)

                y_test_pred = pipeline.predict(x_test)

                scores = {
                    'Classifier': pipeline['estimator'],
                    'Feature_extraction': pipeline['vectorizer'],
                    'Weighting': pipeline['resampler'],
                    'CV': kfold,
                    'Text_length': text_length,
                    'F1_tr_cv': round(f1_score(y_train, y_train_pred_cv), 3),
                    'F1_tr': round(f1_score(y_train, y_train_pred), 3),
                    'F1_ts': round(f1_score(y_test, y_test_pred), 3),
                    'Precision_tr_cv': round(precision_score(y_train, y_train_pred_cv), 3),
                    'Precision_tr': round(precision_score(y_train, y_train_pred), 3),
                    'Precision_ts': round(precision_score(y_test, y_test_pred), 3),
                    'Recall_tr_cv': round(recall_score(y_train, y_train_pred_cv), 3),
                    'Recall_tr': round(recall_score(y_train, y_train_pred), 3),
                    'Recall_ts': round(recall_score(y_test, y_test_pred), 3)    
                }
    
                all_scores.append(scores)

    return all_scores

In [12]:
# embeddings resampled

def train_eval_embedding_resampled(x_train, y_train, x_test, y_test, text_length, kfold, embed_model):
    
    all_scores = []
    
    pipeline = make_pipeline(
        SentenceEncoder(embed_model),
        RandomOverSampler(random_state=42, sampling_strategy='not majority'),
        MLPClassifier(activation='logistic', batch_size=16, hidden_layer_sizes=(), learning_rate='constant',learning_rate_init=0.001, solver='adam', random_state=42)
    )
                                
    y_train_pred_cv = cross_val_predict(pipeline, x_train, y_train, cv=StratifiedKFold(kfold), method='predict')

    pipeline.fit(x_train, y_train)
    
    y_train_pred = pipeline.predict(x_train)

    y_test_pred = pipeline.predict(x_test)

    scores = {
        'Classifier': mlp,
        'Feature_extraction': embed_model,
        'Weighting': ros,
        'CV': kfold,
        'Text_length': text_length,
        'F1_tr_cv': round(f1_score(y_train, y_train_pred_cv), 3),
        'F1_tr': round(f1_score(y_train, y_train_pred), 3),
        'F1_ts': round(f1_score(y_test, y_test_pred), 3),
        'Precision_tr_cv': round(precision_score(y_train, y_train_pred_cv), 3),
        'Precision_tr': round(precision_score(y_train, y_train_pred), 3),
        'Precision_ts': round(precision_score(y_test, y_test_pred), 3),
        'Recall_tr_cv': round(recall_score(y_train, y_train_pred_cv), 3),
        'Recall_tr': round(recall_score(y_train, y_train_pred), 3),
        'Recall_ts': round(recall_score(y_test, y_test_pred), 3)    
    }

    all_scores.append(scores)

    return all_scores

# train eval models

In [13]:
import pandas as pd

In [14]:
# TF TFIDF baseline and weighted

df1 = pd.DataFrame(train_eval_tf_tfidf(x_train, y_train, x_test, y_test,'Title_Abstract',2))
df2 = pd.DataFrame(train_eval_tf_tfidf(x_train_long, y_train_long, x_test_long, y_test_long,'Title_Abstract_Main',2))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [15]:
# embeddings baseline and weighted

df3 = pd.DataFrame(train_eval_embeddings(x_train, y_train, x_test, y_test,'Title_Abstract',2))
df4 = pd.DataFrame(train_eval_embeddings(x_train_long, y_train_long, x_test_long, y_test_long,'Title_Abstract_Main',2))

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
# TF TFIDF resampled

df5 = pd.DataFrame(train_eval_tf_tfidf_resampled(x_train, y_train, x_test, y_test,'Title_Abstract',2))
df6 = pd.DataFrame(train_eval_tf_tfidf_resampled(x_train, y_train, x_test, y_test,'Title_Abstract_Main',2))



In [None]:
# embeddings resampled

df7 = pd.DataFrame(train_eval_embedding_resampled(x_train, y_train, x_test, y_test,'Title_Abstract',2, model_mpnet))
df8 = pd.DataFrame(train_eval_embedding_resampled(x_train, y_train, x_test, y_test,'Title_Abstract_Main',2, model_mpnet))

df9 = pd.DataFrame(train_eval_embedding_resampled(x_train, y_train, x_test, y_test,'Title_Abstract',2, model_distill))
df10 = pd.DataFrame(train_eval_embedding_resampled(x_train, y_train, x_test, y_test,'Title_Abstract',2, model_distill))

# cocatenate model results

In [35]:
res = pd.concat([df1,df2,df3,df4,df5,df6,df7,df8,df9,df10])
res = res.sort_values(by='Recall_tr_cv', ascending=False).reset_index(drop=True)
res


Unnamed: 0,Classifier,Feature_extraction,Weighting,CV,Text_length,F1_tr_cv,F1_tr,F1_ts,Precision_tr_cv,Precision_tr,Precision_ts,Recall_tr_cv,Recall_tr,Recall_ts
0,"MLPClassifier(activation='logistic', batch_siz...",TfidfVectorizer(),"RandomUnderSampler(random_state=42, sampling_s...",2,Title_Abstract,0.056,0.075,0.059,0.029,0.039,0.031,0.918,1.000,0.769
1,"MLPClassifier(activation='logistic', batch_siz...",TfidfVectorizer(),"RandomUnderSampler(random_state=42, sampling_s...",2,Title_Abstract_Main,0.056,0.075,0.059,0.029,0.039,0.031,0.918,1.000,0.769
2,"MLPClassifier(activation='logistic', batch_siz...",CountVectorizer(),"RandomUnderSampler(random_state=42, sampling_s...",2,Title_Abstract,0.065,0.091,0.072,0.034,0.048,0.038,0.878,1.000,0.769
3,"MLPClassifier(activation='logistic', batch_siz...",CountVectorizer(),"RandomUnderSampler(random_state=42, sampling_s...",2,Title_Abstract_Main,0.065,0.091,0.072,0.034,0.048,0.038,0.878,1.000,0.769
4,"LogisticRegression(random_state=42, solver='li...",TfidfVectorizer(),"RandomUnderSampler(random_state=42, sampling_s...",2,Title_Abstract,0.075,0.112,0.076,0.039,0.059,0.040,0.857,1.000,0.692
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,"MLPClassifier(activation='logistic', batch_siz...",distiluse-base-multilingual-cased-v1,,2,Title_Abstract_Main,0.000,0.367,0.267,0.000,1.000,1.000,0.000,0.224,0.154
72,SVC(kernel='linear'),distiluse-base-multilingual-cased-v1,,2,Title_Abstract_Main,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
73,"LogisticRegression(random_state=42, solver='li...",distiluse-base-multilingual-cased-v1,,2,Title_Abstract_Main,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000
74,"LogisticRegression(random_state=42, solver='li...",TfidfVectorizer(),,2,Title_Abstract,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000,0.000


In [37]:
res.to_csv('../results/preliminary/model_results.csv')

# best performing model

In [38]:
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix

# predict probabilities and calculate model loss
# log_likelihood = y_test*np.log(y_pred) + (1-y_test)*np.log(1-y_pred)

# (tn, fp, fn, tp)
#true negatives, false positives
#false negatives, true positives

In [None]:
def train_eval_best_model(x_train, y_train, x_test, y_test, text_length, kfold, embedding_model, random_state):
    
    pipeline = make_pipeline(
        SentenceEncoder(embedding_model),
        LogisticRegression(solver='liblinear', class_weight={0: weight_for_class_0, 1: weight_for_class_1}, random_state=random_state)
    )

    y_train_pred_cv = cross_val_predict(pipeline, x_train, y_train, cv=StratifiedKFold(kfold), method='predict') 

    pipeline.fit(x_train, y_train)
    
    y_train_pred = pipeline.predict(x_train)

    y_test_pred = pipeline.predict(x_test)

    scores = {
        'Classifier': 'Log_reg',
        'Feature_extraction': embedding_model,
        'Weighting': 'Weighted',
        'CV': kfold,
        'Text_length': text_length,
        'Solver': 'Liblinear',
        'F1_tr_cv': round(f1_score(y_train, y_train_pred_cv), 3),
        'F1_tr': round(f1_score(y_train, y_train_pred), 3),
        'F1_ts': round(f1_score(y_test, y_test_pred), 3),
        'Precision_tr_cv': round(precision_score(y_train, y_train_pred_cv), 3),
        'Precision_tr': round(precision_score(y_train, y_train_pred), 3),
        'Precision_ts': round(precision_score(y_test, y_test_pred), 3),
        'Recall_tr_cv': round(recall_score(y_train, y_train_pred_cv), 3),
        'Recall_tr': round(recall_score(y_train, y_train_pred), 3),
        'Recall_ts': round(recall_score(y_test, y_test_pred), 3)
        }
    
    # train set
    y_pred_tr = pipeline.predict_proba(x_train) # predict_proba returns probabilities of a classification label
    logloss_tr = log_loss(y_train, y_pred_tr) 

    # test set
    y_pred_ts = pipeline.predict_proba(x_test)  
    logloss_ts = log_loss(y_test, y_pred_ts)

    print(f'loss training set: {logloss_tr}, \nloss test set {logloss_ts}')
    print(f'confusion matrix train set:\n{confusion_matrix(y_train, pipeline.predict(x_train))}')
    print(f'confusion matrix test set:\n{confusion_matrix(y_test, pipeline.predict(x_test))}') 
 
    return scores, y_pred_ts

In [None]:
mpnet_seed_42 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_mpnet, 42)
mpnet_seed_42

loss training set: 0.12094192813919187, 
loss test set 0.14101942622176325
confusion matrix train set:
[[3787  179]
 [   0   49]]
confusion matrix test set:
[[938  53]
 [  4   9]]


({'Classifier': 'Log_reg',
  'Feature_extraction': 'paraphrase-multilingual-mpnet-base-v2',
  'Weighting': 'Weighted',
  'CV': 2,
  'Text_length': 'Title_Abstract',
  'Solver': 'Liblinear',
  'F1_tr_cv': 0.214,
  'F1_tr': 0.354,
  'F1_ts': 0.24,
  'Precision_tr_cv': 0.134,
  'Precision_tr': 0.215,
  'Precision_ts': 0.145,
  'Recall_tr_cv': 0.531,
  'Recall_tr': 1.0,
  'Recall_ts': 0.692},
 array([[0.66922965, 0.33077035],
        [0.9146951 , 0.0853049 ],
        [0.80018461, 0.19981539],
        ...,
        [0.998998  , 0.001002  ],
        [0.97490819, 0.02509181],
        [0.52840227, 0.47159773]]))

In [44]:
distill_seed_42 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_distill, 42)
distill_seed_42 

loss training set: 0.18816402398979437, 
loss test set 0.20433647693199175
confusion matrix train set:
[[3708  258]
 [   0   49]]
confusion matrix test set:
[[919  72]
 [  3  10]]


({'Classifier': 'Log_reg',
  'Feature_extraction': 'distiluse-base-multilingual-cased-v1',
  'Weighting': 'Weighted',
  'CV': 2,
  'Text_length': 'Title_Abstract',
  'Solver': 'Liblinear',
  'F1_tr_cv': 0.169,
  'F1_tr': 0.275,
  'F1_ts': 0.211,
  'Precision_tr_cv': 0.102,
  'Precision_tr': 0.16,
  'Precision_ts': 0.122,
  'Recall_tr_cv': 0.49,
  'Recall_tr': 1.0,
  'Recall_ts': 0.769},
 array([[0.46783879, 0.53216121],
        [0.89439193, 0.10560807],
        [0.75219147, 0.24780853],
        ...,
        [0.98856338, 0.01143662],
        [0.92281154, 0.07718846],
        [0.31531975, 0.68468025]]))

In [None]:
distill_seed_36= train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2 model_distill, 36)
distill_seed_36

In [None]:
mpnet_seed_36 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_mpnet, 36)
mpnet_seed_36 

# investigating performance on different train-test partitions

In [None]:
# run on a few different train-test partitions, then report the average with the standard error.
# I should see greater performance with more data, but also lower variance across the different random samples

In [None]:
m_seed24 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_distill, 24)
m_seed36 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_distill, 36)
m_seed64 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_distill, 64)
m_seed128 = train_eval_best_model(x_train, y_train, x_test, y_test, 'Title_Abstract', 2, model_distill, 128)

m_seeds = pd.concat([m_seed24, m_seed36, m_seed64, m_seed128])

# error analysis

In [None]:
# combine x df with df containing text to explore 
# missclassification of false positive instances 

In [46]:
# log reg

x_test_df = pd.DataFrame(x_test)

dat_pred_label = pd.DataFrame(distill_seed_42[1], columns=['neg_label', 'pos_label'])
dat_pred_label['y_true'] = y_test
dat_pred_label

# assign predicted labels to examples

def get_prediction_label(row):
    if row["y_true"] == 0:
        return 'fp' if row['neg_label'] < 0.5 else 'tn'
    else:
        return 'tp' if row['pos_label'] > 0.5 else 'fn'

dat_pred_label['prediction_label'] = dat_pred_label.apply(get_prediction_label, axis=1)

dat_pred_label = dat_pred_label.merge(x_test_df, left_index=True, right_index=True)
dat_pred_label.to_csv('../results/preliminary/error_analysis_predictions_with_tetx.csv')

fp_examples = dat_pred_label[dat_pred_label['prediction_label'] == 'fp']
fp_examples


Unnamed: 0,neg_label,pos_label,y_true,prediction_label,0
0,0.467839,0.532161,0,fp,tiempo rendimiento costo aserrado algarrobo bl...
10,0.452309,0.547691,0,fp,germinación supervivencia seis especie nativo ...
51,0.072003,0.927997,0,fp,supervivencia crecimiento especie distinto est...
56,0.466063,0.533937,0,fp,ciclo vida lombriz tierra apto vermicompostaje...
67,0.174027,0.825973,0,fp,sustentable aprovechamiento tierro hojo bosque...
...,...,...,...,...,...
928,0.269882,0.730118,0,fp,efecto gradient pastoreo ovino vegetación suel...
944,0.433470,0.566530,0,fp,eficiencia trampar pitlight led muestreo coleo...
963,0.185885,0.814115,0,fp,abordaje participativo conservación modelo int...
966,0.294325,0.705675,0,fp,relación zorro sechura pseudalopex sechurae th...
