# QuickGuide for Rtings

[프로젝트 페이지: Research-on-the-TV-market](https://github.com/xikest/research-market-tv)

## Env setting: Install Colab selenium & crome driver

In [None]:
!pip install -U pandas openpyxl tqdm
!pip install -U requests selenium beautifulsoup4 
!pip install -U wordcloud nltk 
!pip install -U scikit-learn openai 
!pip install -U matplotlib seaborn
!pip install -U getmodelspec

In [3]:
from market_research.tools.installer import Installer
Installer.install_chrome_and_driver()

## Env. 세팅

In [1]:
from datetime import date
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import files
from market_research import Rtings
from market_research.tools import FileManager

### 분석 폴더 생성
- `input_data`: 검색할 데이터 폴더
- `results`: 검색된 데이터 폴더

In [2]:
intput_folder = Path("input_urls")  # 폴더 이름을 지정
if not intput_folder.exists():
    intput_folder.mkdir(parents=True)

output_folder = Path('results')  
if not output_folder.exists():
  output_folder.mkdir(parents=True, exist_ok=True)

## 준비

In [3]:
webdriver_path = "/content/chromedriver/chromedriver"
browser_path = "/content/chrome/chrome"
enable_headless = True

- 엑셀 파일의 column 명은 `urls.xlsx`으로 하여 `input_urls`폴더에 넣어주세요

In [None]:
urls=["https://www.rtings.com/tv/reviews/sony/a95l-oled",
      "https://www.rtings.com/tv/reviews/lg/g3-oled",
      "https://www.rtings.com/tv/reviews/sony/a80l-a80cl-oled",
      "https://www.rtings.com/tv/reviews/lg/c3-oled",
      "https://www.rtings.com/tv/reviews/sony/x95l",
      "https://www.rtings.com/tv/reviews/sony/x93l-x93cl",
      "https://www.rtings.com/tv/reviews/sony/x90l-x90cl"]

In [5]:
# 분석할 엑셀 파일이 있는 폴더 경로
if not urls:
    file_list = intput_folder.glob('*')
    excel_files = [file for file in file_list if file.suffix in {'.xlsx', '.xls'}]
    
    for excel_file in excel_files:
        df = pd.read_excel(excel_file)
        urls.extend(df["urls"])
print(urls)

## gathering data

In [None]:
score_df = pd.DataFrame()
measurement_df = pd.DataFrame()
comments_df = pd.DataFrame()
for url in urls:
    maker = url.split("/")[-2]
    model = url.split("/")[-1]
    
    # 저장할 데이터 경로
    file_name = f"rtings{date.today().strftime('%Y-%m-%d')}.xlsx"
    output_file_name = output_folder/file_name
    rtings = Rtings(webdriver_path = webdriver_path, browser_path=browser_path, enable_headless=enable_headless)

    df= rtings.get_score(url,format_df=True)
    score_df = pd.concat([score_df,df], axis=0)
    FileManager.df_to_excel(score_df, file_name=output_file_name, sheet_name="scores", mode='w')

    # 저장할 데이터 경로
    df= rtings.get_measurement_reuslts(url)
    measurement_df = pd.concat([measurement_df,df], axis=0)
    FileManager.df_to_excel(measurement_df, file_name=output_file_name, sheet_name="measurement", mode='a')

    comments_df = rtings.get_commetns(url, format_df=True)
    comments_df = pd.concat([comments_df,df], axis=0)
    FileManager.df_to_excel(comments_df, file_name=output_file_name, sheet_name="comments", mode='a')

## plot

## heatmap, scores

In [None]:
col_socres = ["maker","product","header", "score"]
data_df = measurement_df[col_socres].drop_duplicates().replace("",np.nan).dropna()
data_df["score"] = data_df["score"].map(lambda x:float(x))
data_df["product"] = data_df["product"].map(lambda x: x.replace("-oled",""))
data_df = data_df.pivot(index=["maker","product"], columns="header", values='score')

In [None]:
plt.figure(figsize=(8,10))
sns.heatmap(data_df.T, annot=True, cmap="cividis", cbar=True, vmin=0, vmax=10)
plt.title("heatmap for Rtings")
plt.show()

## Line Plot, HDR Brightness

In [None]:
hdr_df = measurement_df[measurement_df["header"] == 'HDR Brightness']
hdr_df["result_value"] = hdr_df["result_value"].map(lambda x: x.replace("cd/m²","")).map(lambda x: x.replace(",","")).map(lambda x:x.strip()).map(lambda x:float(x))
hdr_df_peak = hdr_df[hdr_df.label.map(lambda x:True if "Peak" in x else False)]
hdr_df_peak["label"] = hdr_df_peak["label"].map(lambda x: int(x.split("%")[0].split(" ")[-1]))
hdr_df_peak = hdr_df_peak.sort_values(["maker","product","label"], ascending=False)
# hdr_df_peak["label"]=hdr_df_peak.label.map(lambda x: str(x)+"%")
data_df = hdr_df_peak.pivot(index=["maker","product"], columns="label", values='result_value')
data_df.columns = data_df.columns.map(lambda x: str(x)+"%")

data_df = data_df.reset_index()
data_df = pd.merge(pd.Series([f"{maker}_{product}"for maker, product in zip(data_df["maker"], data_df["product"])],name="label"),
                   data_df, left_index=True, right_index=True).drop(['maker','product'], axis=1).set_index("label")

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("white")

plt.figure(figsize=(10, 5))
sns.lineplot(data=data_df.T, dashes=False, markers=True, palette='YlGnBu')
plt.yticks([200, 1000, 1500, 2000], ['200', '1000', '1500', '2000'])
sns.despine()

In [None]:
import plotly.express as px
fig = px.line(data_df.T, x=data_df.T.index, y=data_df.T.columns, title='HDR Brightness',
              labels={'index': 'APL', 'value': 'Brightness'}, line_shape='linear',
              color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_layout(width=1000, height=500, template='plotly_white', margin=dict(l=10, r=10, b=10, t=40))
fig.update_yaxes(range=[0, 2000], tickvals=[200, 1000, 1500, 2000], ticktext=['200', '1000', '1500', '2000'])
fig.show()

# template='plotly_dark','ggplot2', 'seaborn', 'simple_white', 'plotly', 'plotly_white'

---

In [None]:
set(measurement_df.header)

- colab에서 파일 받기

In [None]:
!zip -r /content/results_rtings.zip /content/results/
files.download('/content/results_rtings.zip')

---