In [2]:
from datasets import load_dataset

# 加载SST-2数据集的前20个测试样本
dataset = load_dataset("glue", "sst2", split="test[:20]")

print("数据集信息:")
print(f"类型: {type(dataset)}")
print(f"样本数: {len(dataset)}")
print(f"特征: {dataset.features}")

print("\n前3个样本:")
for i in range(3):
    print(f"样本 {i+1}:")
    print(f"  文本: {dataset[i]['sentence']}")
    print(f"  标签: {dataset[i]['label']}")
    print()

'(ProtocolError('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None)), '(Request ID: 694d524a-f6c1-4c83-843f-20cf0d9296e1)')' thrown while requesting HEAD https://huggingface.co/datasets/glue/resolve/main/README.md
Retrying in 1s [Retry 1/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None)), '(Request ID: 590ecdcd-6f21-4e3f-834c-b72adf9ac99a)')' thrown while requesting HEAD https://huggingface.co/datasets/glue/resolve/main/README.md
Retrying in 2s [Retry 2/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None)), '(Request ID: d1166312-8bcf-4dae-b4ae-95d4967cd891)')' thrown while requesting HEAD https://huggingface.co/datasets/glue/resolve/main/README.md
Retrying in 4s [Retry 3/5].
'(ProtocolError('Connection aborted.', ConnectionResetError(10054, '远程主机强迫关闭了一个现有的连接。', None, 10054, None)), '(Request ID: cd7a440b-b52e-46d4-be82-ca4fc878d8

数据集信息:
类型: <class 'datasets.arrow_dataset.Dataset'>
样本数: 20
特征: {'sentence': Value('string'), 'label': ClassLabel(names=['negative', 'positive']), 'idx': Value('int32')}

前3个样本:
样本 1:
  文本: uneasy mishmash of styles and genres .
  标签: -1

样本 2:
  文本: this film 's relationship to actual tension is the same as what christmas-tree flocking in a spray can is to actual snow : a poor -- if durable -- imitation .
  标签: -1

样本 3:
  文本: by the end of no such thing the audience , like beatrice , has a watchful affection for the monster .
  标签: -1



In [37]:
from datasets import load_dataset
import requests
import time
import re

# 加载数据集
dataset = load_dataset("glue", "sst2", split="test[:20]")

def query_ollama(prompt, model="llama2:7b"):
    """调用Ollama模型"""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,  # 降低随机性
            "num_predict": 50    # 限制输出长度
        }
    }
    
    try:
        response = requests.post(url, json=data, timeout=60)
        if response.status_code == 200:
            return response.json()["response"]
        else:
            return f"错误: {response.status_code}"
    except Exception as e:
        return f"请求失败: {str(e)}"

# 改进的提示词
improved_prompt = """判断以下电影评论的情感，只返回一个数字：
- 正面情感返回 1
- 负面情感返回 -1

评论："{0}"

情感："""

def robust_sentiment_parser(response):
    """鲁棒的情感解析器"""
    if not response or len(response.strip()) == 0:
        return None
    
    response_clean = response.strip()
    
    # 1. 直接匹配纯数字
    if response_clean == '1':
        return 1
    if response_clean == '-1':
        return -1
    
    # 2. 使用正则表达式提取数字
    numbers = re.findall(r'-?\d+', response_clean)
    for num in numbers:
        if num == '-1':
            return -1
        elif num == '1':
            return 1
    
    # 3. 关键词匹配（英文）
    response_lower = response_clean.lower()
    
    positive_keywords = [
        'positive', 'good', 'great', 'excellent', 'wonderful', 'amazing',
        'love', 'like', 'enjoy', 'brilliant', 'fantastic', 'awesome'
    ]
    
    negative_keywords = [
        'negative', 'bad', 'poor', 'terrible', 'awful', 'horrible',
        'hate', 'dislike', 'boring', 'disappointing', 'failure'
    ]
    
    pos_count = sum(1 for word in positive_keywords if word in response_lower)
    neg_count = sum(1 for word in negative_keywords if word in response_lower)
    
    if pos_count > neg_count:
        return 1
    elif neg_count > pos_count:
        return -1
    
    # 4. 检查是否包含情感表述
    if 'positive' in response_lower:
        return 1
    elif 'negative' in response_lower:
        return -1
    
    return None

# 主循环
results = []
print("开始情感分析...")

for i, item in enumerate(dataset):
    text = item['sentence']
    true_label = item['label']
    
    response = query_ollama(improved_prompt.format(text))
    predicted_sentiment = robust_sentiment_parser(response)
    true_sentiment = 1 if true_label == 1 else -1
    
    is_correct = predicted_sentiment == true_sentiment if predicted_sentiment is not None else False
    
    results.append({
        'text': text,
        'true_sentiment': true_sentiment,
        'predicted_sentiment': predicted_sentiment,
        'response': response,
        'correct': is_correct
    })
    
    # 只显示有问题的样本或每10个显示一次
    # if predicted_sentiment is None or not is_correct or i % 10 == 0:
    #     print(f"样本 {i+1}:")
    #     print(f"文本: {text}")
    #     print(f"回复: {response}")
    #     print(f"真实: {true_sentiment}, 预测: {predicted_sentiment}, 正确: {is_correct}")
    #     print("-" * 80)
    
    # time.sleep(1)

# 统计结果
valid_predictions = [r for r in results if r['predicted_sentiment'] is not None]
correct_predictions = sum(1 for r in valid_predictions if r['correct'])
total_valid = len(valid_predictions)

print(f"\n结果统计:")
print(f"总样本数: {len(results)}")
print(f"有效预测数: {total_valid}")
print(f"有效预测准确率: {correct_predictions/total_valid:.2%}" if total_valid > 0 else "无有效预测")
print(f"无效预测数: {len(results) - total_valid}")

开始情感分析...

结果统计:
总样本数: 20
有效预测数: 18
有效预测准确率: 22.22%
无效预测数: 2


In [32]:
from datasets import load_dataset
import requests
import time
import re

# 加载数据集
dataset = load_dataset("glue", "sst2", split="test[:20]")

def query_ollama(prompt, model="deepseek-coder:6.7b"):
    """调用Ollama模型"""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,  # 降低随机性
            "num_predict": 50    # 限制输出长度
        }
    }
    
    try:
        response = requests.post(url, json=data, timeout=60)
        if response.status_code == 200:
            return response.json()["response"]
        else:
            return f"错误: {response.status_code}"
    except Exception as e:
        return f"请求失败: {str(e)}"

# 改进的提示词
improved_prompt = """判断以下电影评论的情感，只返回一个数字：
- 正面情感返回 1
- 负面情感返回 -1

评论："{0}"

情感："""

def robust_sentiment_parser(response):
    """鲁棒的情感解析器"""
    if not response or len(response.strip()) == 0:
        return None
    
    response_clean = response.strip()
    
    # 1. 直接匹配纯数字
    if response_clean == '1':
        return 1
    if response_clean == '-1':
        return -1
    
    # 2. 使用正则表达式提取数字
    numbers = re.findall(r'-?\d+', response_clean)
    for num in numbers:
        if num == '-1':
            return -1
        elif num == '1':
            return 1
    
    # 3. 关键词匹配（英文）
    response_lower = response_clean.lower()
    
    positive_keywords = [
        'positive', 'good', 'great', 'excellent', 'wonderful', 'amazing',
        'love', 'like', 'enjoy', 'brilliant', 'fantastic', 'awesome'
    ]
    
    negative_keywords = [
        'negative', 'bad', 'poor', 'terrible', 'awful', 'horrible',
        'hate', 'dislike', 'boring', 'disappointing', 'failure'
    ]
    
    pos_count = sum(1 for word in positive_keywords if word in response_lower)
    neg_count = sum(1 for word in negative_keywords if word in response_lower)
    
    if pos_count > neg_count:
        return 1
    elif neg_count > pos_count:
        return -1
    
    # 4. 检查是否包含情感表述
    if 'positive' in response_lower:
        return 1
    elif 'negative' in response_lower:
        return -1
    
    return None

# 主循环
results = []
print("开始情感分析...")

for i, item in enumerate(dataset):
    text = item['sentence']
    true_label = item['label']
    
    response = query_ollama(improved_prompt.format(text))
    predicted_sentiment = robust_sentiment_parser(response)
    true_sentiment = 1 if true_label == 1 else -1
    
    is_correct = predicted_sentiment == true_sentiment if predicted_sentiment is not None else False
    
    results.append({
        'text': text,
        'true_sentiment': true_sentiment,
        'predicted_sentiment': predicted_sentiment,
        'response': response,
        'correct': is_correct
    })
    
    # 只显示有问题的样本或每10个显示一次
    # if predicted_sentiment is None or not is_correct or i % 10 == 0:
    #     print(f"样本 {i+1}:")
    #     print(f"文本: {text}")
    #     print(f"回复: {response}")
    #     print(f"真实: {true_sentiment}, 预测: {predicted_sentiment}, 正确: {is_correct}")
    #     print("-" * 80)
    
    # time.sleep(1)

# 统计结果
valid_predictions = [r for r in results if r['predicted_sentiment'] is not None]
correct_predictions = sum(1 for r in valid_predictions if r['correct'])
total_valid = len(valid_predictions)

#print(f"\n结果统计:")
#print(f"总样本数: {len(results)}")
print(f"有效预测数: {total_valid}")
print(f"有效预测准确率: {correct_predictions/total_valid:.2%}" if total_valid > 0 else "无有效预测")
print(f"无效预测数: {len(results) - total_valid}")

开始情感分析...


KeyboardInterrupt: 

In [35]:
from datasets import load_dataset
import requests
import time
import re

# 加载数据集
dataset = load_dataset("glue", "sst2", split="test[:20]")

def query_ollama(prompt, model="gemma:2b-instruct"):
    """调用Ollama模型"""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,  # 降低随机性
            "num_predict": 50    # 限制输出长度
        }
    }
    
    try:
        response = requests.post(url, json=data, timeout=60)
        if response.status_code == 200:
            return response.json()["response"]
        else:
            return f"错误: {response.status_code}"
    except Exception as e:
        return f"请求失败: {str(e)}"

# 改进的提示词
improved_prompt = """判断以下电影评论的情感，只返回一个数字：
- 正面情感返回 1
- 负面情感返回 -1

评论："{0}"

情感："""

def robust_sentiment_parser(response):
    """鲁棒的情感解析器"""
    if not response or len(response.strip()) == 0:
        return None
    
    response_clean = response.strip()
    
    # 1. 直接匹配纯数字
    if response_clean == '1':
        return 1
    if response_clean == '-1':
        return -1
    
    # 2. 使用正则表达式提取数字
    numbers = re.findall(r'-?\d+', response_clean)
    for num in numbers:
        if num == '-1':
            return -1
        elif num == '1':
            return 1
    
    # 3. 关键词匹配（英文）
    response_lower = response_clean.lower()
    
    positive_keywords = [
        'positive', 'good', 'great', 'excellent', 'wonderful', 'amazing',
        'love', 'like', 'enjoy', 'brilliant', 'fantastic', 'awesome'
    ]
    
    negative_keywords = [
        'negative', 'bad', 'poor', 'terrible', 'awful', 'horrible',
        'hate', 'dislike', 'boring', 'disappointing', 'failure'
    ]
    
    pos_count = sum(1 for word in positive_keywords if word in response_lower)
    neg_count = sum(1 for word in negative_keywords if word in response_lower)
    
    if pos_count > neg_count:
        return 1
    elif neg_count > pos_count:
        return -1
    
    # 4. 检查是否包含情感表述
    if 'positive' in response_lower:
        return 1
    elif 'negative' in response_lower:
        return -1
    
    return None

# 主循环
results = []
print("开始情感分析...")

for i, item in enumerate(dataset):
    text = item['sentence']
    true_label = item['label']
    
    response = query_ollama(improved_prompt.format(text))
    predicted_sentiment = robust_sentiment_parser(response)
    true_sentiment = 1 if true_label == 1 else -1
    
    is_correct = predicted_sentiment == true_sentiment if predicted_sentiment is not None else False
    
    results.append({
        'text': text,
        'true_sentiment': true_sentiment,
        'predicted_sentiment': predicted_sentiment,
        'response': response,
        'correct': is_correct
    })
    
    # 只显示有问题的样本或每10个显示一次
    if predicted_sentiment is None or not is_correct or i % 10 == 0:
        print(f"样本 {i+1}:")
        print(f"文本: {text}")
        print(f"回复: {response}")
        print(f"真实: {true_sentiment}, 预测: {predicted_sentiment}, 正确: {is_correct}")
        print("-" * 80)
    
    time.sleep(1)

# 统计结果
valid_predictions = [r for r in results if r['predicted_sentiment'] is not None]
correct_predictions = sum(1 for r in valid_predictions if r['correct'])
total_valid = len(valid_predictions)

print(f"\n结果统计:")
print(f"总样本数: {len(results)}")
print(f"有效预测数: {total_valid}")
print(f"有效预测准确率: {correct_predictions/total_valid:.2%}" if total_valid > 0 else "无有效预测")
print(f"无效预测数: {len(results) - total_valid}")

开始情感分析...
样本 1:
文本: uneasy mishmash of styles and genres .
回复: -1
真实: -1, 预测: -1, 正确: True
--------------------------------------------------------------------------------
样本 3:
文本: by the end of no such thing the audience , like beatrice , has a watchful affection for the monster .
回复: 1
真实: -1, 预测: 1, 正确: False
--------------------------------------------------------------------------------
样本 4:
文本: director rob marshall went out gunning to make a great one .
回复: 1
真实: -1, 预测: 1, 正确: False
--------------------------------------------------------------------------------
样本 5:
文本: lathan and diggs have considerable personal charm , and their screen rapport makes the old story seem new .
回复: 1
真实: -1, 预测: 1, 正确: False
--------------------------------------------------------------------------------
样本 6:
文本: a well-made and often lovely depiction of the mysteries of friendship .
回复: 1
真实: -1, 预测: 1, 正确: False
------------------------------------------------------------------------------

In [23]:
from datasets import load_dataset
import requests
import time
import re

# 加载数据集
dataset = load_dataset("glue", "sst2", split="test[:20]")

def query_ollama(prompt, model="phi:2.7b"):
    """调用Ollama模型"""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,  # 降低随机性
            "num_predict": 50    # 限制输出长度
        }
    }
    
    try:
        response = requests.post(url, json=data, timeout=60)
        if response.status_code == 200:
            return response.json()["response"]
        else:
            return f"错误: {response.status_code}"
    except Exception as e:
        return f"请求失败: {str(e)}"

# 改进的提示词
improved_prompt = """判断以下电影评论的情感，只返回一个数字：
- 正面情感返回 1
- 负面情感返回 -1

评论："{0}"

情感："""

def robust_sentiment_parser(response):
    """鲁棒的情感解析器"""
    if not response or len(response.strip()) == 0:
        return None
    
    response_clean = response.strip()
    
    # 1. 直接匹配纯数字
    if response_clean == '1':
        return 1
    if response_clean == '-1':
        return -1
    
    # 2. 使用正则表达式提取数字
    numbers = re.findall(r'-?\d+', response_clean)
    for num in numbers:
        if num == '-1':
            return -1
        elif num == '1':
            return 1
    
    # 3. 关键词匹配（英文）
    response_lower = response_clean.lower()
    
    positive_keywords = [
        'positive', 'good', 'great', 'excellent', 'wonderful', 'amazing',
        'love', 'like', 'enjoy', 'brilliant', 'fantastic', 'awesome'
    ]
    
    negative_keywords = [
        'negative', 'bad', 'poor', 'terrible', 'awful', 'horrible',
        'hate', 'dislike', 'boring', 'disappointing', 'failure'
    ]
    
    pos_count = sum(1 for word in positive_keywords if word in response_lower)
    neg_count = sum(1 for word in negative_keywords if word in response_lower)
    
    if pos_count > neg_count:
        return 1
    elif neg_count > pos_count:
        return -1
    
    # 4. 检查是否包含情感表述
    if 'positive' in response_lower:
        return 1
    elif 'negative' in response_lower:
        return -1
    
    return None

# 主循环
results = []
print("开始情感分析...")

for i, item in enumerate(dataset):
    text = item['sentence']
    true_label = item['label']
    
    response = query_ollama(improved_prompt.format(text))
    predicted_sentiment = robust_sentiment_parser(response)
    true_sentiment = 1 if true_label == 1 else -1
    
    is_correct = predicted_sentiment == true_sentiment if predicted_sentiment is not None else False
    
    results.append({
        'text': text,
        'true_sentiment': true_sentiment,
        'predicted_sentiment': predicted_sentiment,
        'response': response,
        'correct': is_correct
    })
    
    # 只显示有问题的样本或每10个显示一次
    # if predicted_sentiment is None or not is_correct or i % 10 == 0:
    #     print(f"样本 {i+1}:")
    #     print(f"文本: {text}")
    #     print(f"回复: {response}")
    #     print(f"真实: {true_sentiment}, 预测: {predicted_sentiment}, 正确: {is_correct}")
    #     print("-" * 80)
    
    # time.sleep(1)

# 统计结果
valid_predictions = [r for r in results if r['predicted_sentiment'] is not None]
correct_predictions = sum(1 for r in valid_predictions if r['correct'])
total_valid = len(valid_predictions)

print(f"\n结果统计:")
print(f"总样本数: {len(results)}")
print(f"有效预测数: {total_valid}")
print(f"有效预测准确率: {correct_predictions/total_valid:.2%}" if total_valid > 0 else "无有效预测")
print(f"无效预测数: {len(results) - total_valid}")

开始情感分析...

结果统计:
总样本数: 20
有效预测数: 19
有效预测准确率: 26.32%
无效预测数: 1


In [22]:
import os
# 设置使用CPU
os.environ['OLLAMA_CPU'] = '1'
from datasets import load_dataset
import requests
import time
import re

# 加载数据集
dataset = load_dataset("glue", "sst2", split="test[:20]")

def query_ollama(prompt, model="mistral"):
    """调用Ollama模型"""
    url = "http://localhost:11434/api/generate"
    data = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {
            "temperature": 0.1,  # 降低随机性
            "num_predict": 50    # 限制输出长度
        }
    }
    
    try:
        response = requests.post(url, json=data, timeout=120)  # 增加超时时间
        if response.status_code == 200:
            return response.json()["response"]
        else:
            print(f"API错误: {response.status_code} - {response.text}")
            return f"错误: {response.status_code}"
    except requests.exceptions.Timeout:
        return "错误: 请求超时"
    except requests.exceptions.ConnectionError:
        return "错误: 无法连接到Ollama服务，请确保Ollama正在运行"
    except Exception as e:
        return f"请求失败: {str(e)}"

# 改进的提示词
improved_prompt = """请分析以下电影评论的情感倾向，只返回一个数字：
- 如果是正面情感，返回 1
- 如果是负面情感，返回 -1

评论："{0}"

请只返回数字，不要有其他文字："""

def robust_sentiment_parser(response):
    """鲁棒的情感解析器"""
    if not response or len(response.strip()) == 0:
        return None
    
    response_clean = response.strip()
    
    # 1. 直接匹配纯数字
    if response_clean == '1':
        return 1
    if response_clean == '-1':
        return -1
    
    # 2. 使用正则表达式提取数字
    numbers = re.findall(r'-?\d+', response_clean)
    for num in numbers:
        if num == '-1':
            return -1
        elif num == '1':
            return 1
    
    # 3. 关键词匹配（英文）
    response_lower = response_clean.lower()
    
    positive_keywords = [
        'positive', 'good', 'great', 'excellent', 'wonderful', 'amazing',
        'love', 'like', 'enjoy', 'brilliant', 'fantastic', 'awesome', 'positive'
    ]
    
    negative_keywords = [
        'negative', 'bad', 'poor', 'terrible', 'awful', 'horrible',
        'hate', 'dislike', 'boring', 'disappointing', 'failure', 'negative'
    ]
    
    pos_count = sum(1 for word in positive_keywords if word in response_lower)
    neg_count = sum(1 for word in negative_keywords if word in response_lower)
    
    if pos_count > neg_count:
        return 1
    elif neg_count > pos_count:
        return -1
    
    # 4. 检查是否包含情感表述
    if 'positive' in response_lower:
        return 1
    elif 'negative' in response_lower:
        return -1
    
    return None

# 检查Ollama服务是否可用
def check_ollama_availability():
    """检查Ollama服务是否可用"""
    try:
        response = requests.get("http://localhost:11434/api/tags", timeout=10)
        return response.status_code == 200
    except:
        return False

# 主循环
print("开始情感分析...")

# 检查服务
if not check_ollama_availability():
    print("错误: 无法连接到Ollama服务，请确保Ollama正在运行")
    print("请在命令行中运行: ollama serve")
else:
    print("Ollama服务连接成功")
    
    results = []
    successful_requests = 0
    
    for i, item in enumerate(dataset):
        text = item['sentence']
        true_label = item['label']
        
        # print(f"处理样本 {i+1}/{len(dataset)}: {text[:50]}...")
        
        response = query_ollama(improved_prompt.format(text))
        predicted_sentiment = robust_sentiment_parser(response)
        true_sentiment = 1 if true_label == 1 else -1
        
        is_correct = predicted_sentiment == true_sentiment if predicted_sentiment is not None else False
        
        results.append({
            'text': text,
            'true_sentiment': true_sentiment,
            'predicted_sentiment': predicted_sentiment,
            'response': response,
            'correct': is_correct
        })
        
        # 显示详细结果
        # print(f"样本 {i+1}:")
        # print(f"文本: {text}")
        # print(f"回复: {response}")
        # print(f"真实: {true_sentiment}, 预测: {predicted_sentiment}, 正确: {is_correct}")
        # print("-" * 80)
        
        # 统计成功请求
        if not response.startswith("错误"):
            successful_requests += 1
        
        # 增加延迟以避免过载
        time.sleep(2)

    # 统计结果
    valid_predictions = [r for r in results if r['predicted_sentiment'] is not None]
    correct_predictions = sum(1 for r in valid_predictions if r['correct'])
    total_valid = len(valid_predictions)
    
    print(f"\n结果统计:")
    print(f"总样本数: {len(results)}")
    print(f"成功请求数: {successful_requests}")
    print(f"有效预测数: {total_valid}")
    if total_valid > 0:
        print(f"有效预测准确率: {correct_predictions/total_valid:.2%}")
    else:
        print("无有效预测")
    print(f"无效预测数: {len(results) - total_valid}")
    
    # 显示错误详情
    error_samples = [r for r in results if r['response'].startswith("错误")]
    if error_samples:
        print(f"\n错误样本 ({len(error_samples)}个):")
        for i, sample in enumerate(error_samples[:5]):  # 只显示前5个错误
            print(f"  {i+1}. {sample['text'][:50]}... -> {sample['response']}")

开始情感分析...
Ollama服务连接成功
处理样本 1/20: uneasy mishmash of styles and genres ....
处理样本 2/20: this film 's relationship to actual tension is the...
处理样本 3/20: by the end of no such thing the audience , like be...
处理样本 4/20: director rob marshall went out gunning to make a g...
处理样本 5/20: lathan and diggs have considerable personal charm ...
处理样本 6/20: a well-made and often lovely depiction of the myst...
处理样本 7/20: none of this violates the letter of behan 's book ...
处理样本 8/20: although it bangs a very cliched drum at times , t...
处理样本 9/20: it is not a mass-market entertainment but an uncom...
处理样本 10/20: this is junk food cinema at its greasiest ....
处理样本 11/20: it 's also heavy-handed and devotes too much time ...
处理样本 12/20: it helps that lil bow wow ... tones down his pint-...
处理样本 13/20: watching the film is like reading a times portrait...
处理样本 14/20: moore 's performance impresses almost as much as h...
处理样本 15/20: reinforces the talents of screenwriter charlie kau...
处理样本 16/20: now