# 手写基于情感字典的简易情感语义分析

### 准备工作：

加载情感字典

In [1]:
import re
pos_sentiment_dict, neg_sentiment_dict, adverb_dict, denial_dict = {}, {}, {}, {}
pattern = re.compile(r'\s+')
with open('./user_dict/positive_dict.txt','r',encoding='utf-8') as f:
    for line in f:
        result = pattern.split(line.strip())
        if len(result) == 2:
        # print(result[0], result[1])
            pos_sentiment_dict[result[0]] = float(result[1])
            
with open('./user_dict/negative_dict.txt','r',encoding='utf-8') as f:
    for line in f:
        result = pattern.split(line.strip())
        if len(result) == 2:
        # print(result[0], result[1])
            neg_sentiment_dict[result[0]] = float(result[1])
            
with open('./user_dict/adverb_dict.txt', 'r', encoding='utf-8') as f:
    for line in f:
        result = pattern.split(line.strip())
        if len(result) == 2:
        # print(result[0], result[1])
            adverb_dict[result[0]] = float(result[1])
            
with open('./user_dict/denial_dict.txt', 'r', encoding='utf-8') as f:
    for line in f:
        result = pattern.split(line.strip())
        if len(result) == 2:
        # print(result[0], result[1])
            denial_dict[result[0]] = float(result[1])
        

In [2]:
import jieba.posseg as pseg

def pos_or_neg(num):
    return '正向情感' if num > 0 else '负向情感'

test_sentence = '苹果好吃'
words = pseg.lcut(test_sentence) # return jieba.posseg.pair结构：苹果/n

Building prefix dict from the default dictionary ...
Loading model from cache C:\Users\SYMBOL~1\AppData\Local\Temp\jieba.cache
Loading model cost 1.786 seconds.
Prefix dict has been built succesfully.


#### 正向分句情感分析：

In [3]:
for word,flag in words:
    if word in pos_sentiment_dict:
        print(word, flag, pos_sentiment_dict.get(word))
        print(pos_or_neg(pos_sentiment_dict.get(word)))

好吃 v 3.0
正向情感


#### 负向分句情感分析的例子：

In [4]:
test_sentence = '苹果不好'
words = pseg.lcut(test_sentence)
print(words)

for word,flag in words:
    if word in neg_sentiment_dict:
        print(word, flag, neg_sentiment_dict.get(word)*-1)
        print(pos_or_neg(neg_sentiment_dict.get(word)*-1))

[pair('苹果', 'n'), pair('不好', 'd')]
不好 d -2.0
负向情感


#### 正向情感加强：添加副词情况

In [5]:
test_sentence = '苹果很好吃'
words = pseg.lcut(test_sentence)
words

[pair('苹果', 'n'), pair('很', 'zg'), pair('好吃', 'v')]

In [6]:
score = 0
for i in range(len(words)):
    # 词性：words[i].flag, 词：words[i].word
    if words[i].word in pos_sentiment_dict:
        score += pos_sentiment_dict.get(words[i].word)

    if words[i].word in adverb_dict:
        score += adverb_dict.get(words[i].word)
# 3+1.75
print(score)

4.75


**上例采用累加的方式来计算整个语句的分数！**

这在“很”、“非常”一类修饰词中确实没问题，但是在“稍微”、“有点”等幅度较轻的修饰词中就存在问题了！
举个例子：
```
中心情感词：好吃 3.0
修饰词：很 1.75 有点 0.8
```
那么
```
很好吃 3+1.75=4.75
有点好吃 3+0.8=3.08
```
从数值计算角度上“很好吃”和“有点好吃”都是对“好吃”情感的加强，可是实际上“有点好吃”其实是对“好吃”的减弱，所以单纯的累加计算是不准确的。可以采用下述方式：
- 乘法计算
- 判断修饰词与1的关系，小于1时用减法

#### 以情感词为中心的前N个视窗分析：

In [7]:
emotion_dict = {}

def windows_n_analysis(word, flag, weight, words, position):
    '''
    word: 情感词
    flag：情感词词性
    weight:情感词权重
    words:子句
    position:情感词在子句中的下标
    '''
    print(word, flag, words, position)
    current_weight = weight
    
    analysis_dict = {"key": word, "adverb":[], "denial":[], "value": weight, "score":0}
    view_window = position - 1
    if view_window > -1:
        if words[view_window].word in adverb_dict:
            adverb = {"key": words[view_window].word, "position":1, "value": adverb_dict.get(words[view_window].word)}
            current_weight *= adverb_dict.get(words[view_window].word)
            analysis_dict["adverb"].append(adverb)
            
        if words[view_window].word in denial_dict:
            denial = {"key": words[view_window].word, "position":1, "value": denial_dict.get(words[view_window].word)}
            current_weight *= -denial_dict.get(words[view_window].word)
            analysis_dict["denial"].append(denial)
            
    view_window = position - 2
    if view_window > -1:
        if words[view_window].word in adverb_dict:
            adverb = {"key": words[view_window].word, "position":1, "value": adverb_dict.get(words[view_window].word)}
            current_weight *= adverb_dict.get(words[view_window].word)
            analysis_dict["adverb"].append(adverb)
            
        if words[view_window].word in denial_dict:
            denial = {"key": words[view_window].word, "position":1, "value": -denial_dict.get(words[view_window].word)}
            current_weight *= -denial_dict.get(words[view_window].word)
            analysis_dict["denial"].append(denial)
            
    analysis_dict["score"] = current_weight
    print(analysis_dict)
    print(analysis_dict['score'])
    
test_sentence = '要是米饭再多点儿就好了'
words = pseg.lcut(test_sentence)
    
for i in range(len(words)):
    if words[i].word in pos_sentiment_dict:
        windows_n_analysis(words[i].word,words[i].flag, pos_sentiment_dict.get(words[i].word), words, i)

好 a [pair('要是', 'c'), pair('米饭', 'n'), pair('再', 'd'), pair('多点儿', 'm'), pair('就', 'd'), pair('好', 'a'), pair('了', 'ul')] 5
{'key': '好', 'adverb': [], 'denial': [], 'value': 2.0, 'score': 2.0}
2.0


**PS：只能识别比较简单的分句，诸如“要是...就好了”等句型则需要更高的句法字典**

到目前为止，我们看到的都是一句话句子的情感分析，或者说是一个子句的分析。那么一个完整的句子又如何分析呢？

原理很简单，将完整的语句按标点分成N个子句，计算每个子句的分数值，求和即可得出整个句子的情感分数值

如果句子中同时具有正向情感子句和负向情感子句，分别计算后相减即为最终的句子情感分数值。如“很好吃，但没用”

### 特别指出：基于情感字典的情感语义分析效果十分依赖情感字典的准确性、完整性及情感字典分数值的合理性！