# QuickGuide for Sentiment analyze

[프로젝트 페이지: Research-on-the-TV-market](https://github.com/xikest/research-market-tv)

## Env setting: Install Colab selenium & crome driver

In [12]:
import platform
current_os = platform.system()
if current_os == "Linux":
    !pip install --upgrade pip
    
  # Install required Python packages
    !pip install -U  matplotlib numpy openpyxl tqdm pandas seaborn openai 
    !pip install -U sentigpt

elif current_os == "Windows":
    print("windows")
else:
    print("지원하지 않는 운영체제입니다.")

## Env 세팅

In [46]:
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import seaborn as sns
from sentigpt import SentiGPT
from pathlib import Path
import pandas as pd

### 시각화 function

In [None]:
def plot_hist_each(df_analyzed_results, output_folder, file_name):
    columns = df_analyzed_results.columns
    for i, column in enumerate(columns):
        sns.set_style("white")
        fig, axes = plt.subplots(figsize=(10, 4), sharey=True)
        
        sns.histplot(df_analyzed_results[column], kde=True, label=column, bins=10, binwidth=1,  ax=axes)
        axes.set_ylabel("Density")
        axes.set_title(f"{column}")
        # axes.legend(loc="upper right")
        axes.set_xlim(-5, 5)
        axes.set_xlabel("")
        bins = range(-5,6)
        axes.set_xticks(bins)
        axes.yaxis.set_major_locator(MaxNLocator(integer=True))
        sns.despine()
        plt.tight_layout()
        save_path = output_folder / f"{file_name}_{column}_histogram.png"
        plt.savefig(save_path, format='png', dpi=300)
        plt.show()


def plot_hist_all(df_analyzed_results, output_folder, file_name):

  sns.set(style="white")
  fig, axes = plt.subplots(figsize=(10, 4))


  # 데이터프레임의 각 열에 대해 히스토그램 그리기
  for i, column in enumerate(df_analyzed_results.columns):
      color = sns.color_palette("Set1", len(df_analyzed_results.columns))[i]  # Set1 컬러맵 사용
      sns.histplot(df_analyzed_results[column], kde=True, label=column, bins=10, binwidth=1, ax=axes, color=color)

  # x축 설정
  axes.set_xlim(-5, 5)
  bins = range(-5, 6)
  axes.set_xticks(bins)

  # y축 눈금 설정 (정수로)
  axes.yaxis.set_major_locator(MaxNLocator(integer=True))

  # 레이블 및 타이틀 설정
  axes.set_ylabel("Density")
  axes.set_title("")
  axes.set_xlabel("")

  # 범례 표시
  axes.legend()
  sns.despine()

  # 그래프 저장
  save_path = output_folder / f"{file_name}_all_columns_histogram.png"
  plt.savefig(save_path, format='png', dpi=300)

  # 그래프 출력
  plt.show()       

### 분석 폴더
- `input_data`에 파일을 넣어주세요.

In [15]:
intput_folder = Path("input_data")  # 폴더 이름을 지정

# 폴더가 존재하지 않으면 폴더 생성
if not intput_folder.exists():
    intput_folder.mkdir(parents=True)

output_folder = Path('results')  
if not output_folder.exists():
  output_folder.mkdir(parents=True, exist_ok=True)

### open API key

In [4]:
API_KEY = ""

## 준비

In [3]:
# 분석할 엑셀 파일이 있는 폴더 경로
file_list = intput_folder.glob('*')
excel_files = [file for file in file_list if file.suffix in {'.xlsx', '.xls'}]
for excel_file in excel_files:
    print(excel_file.name)

In [18]:
stm = SentiGPT(API_KEY)

## 실행

In [19]:
for excel_file in excel_files:
  # 데이터 셋
  excel_file_path = excel_file
  df_uploaded = pd.read_excel(excel_file_path)
  df_sentences = df_uploaded

  # GPT 분석
  sentences_list = [sentence for sentence in df_sentences["sentences"]]  # 리스트로 변환
  keywords_list=["brightness", "color", "contrast", "reflection", "viewing angle"]
  analyzed_results_df = stm.analyze_sentences(sentences_list, keywords_list)

  # 분석 파일 저장
  file_name=excel_file_path.name.split(".")[0]
  analyzed_results_df.to_csv(file_name, index=False, encoding='utf-8')

  #그래프 만들기
  df_analyzed_results = analyzed_results_df - 5  #그래프 범위 조정
  print(f"{file_name}, Plot")
  plot_hist_each(df_analyzed_results, output_folder=output_folder, file_name=file_name)
  plot_hist_all(df_analyzed_results, output_folder=output_folder, file_name=file_name)