In [32]:
import json
import pandas
import numpy as np
from collections import namedtuple

years = ("2015", "2016", "2017", "2018", "2019", "2020", "2021")
# years = ("2019", "2020")
quarter_path = ["data/json/" + x + "_fourth_quarter_정보.json" for x in years]

res_tuple = namedtuple('corp', ["revenue", "operation_profit", "opm"])
def analyze(corp):
    """
    회사 객체 리스트에서 매출액, 매출총이익, GPM을 구해서 반환함.
    """
    revenue = 0
    operation_profit = 0
    opm = 0
    try:
        for sector in corp:
            account_nm = sector["account_nm"]
            if account_nm == "매출액":
                revenue = int(sector["thstrm_amount"].replace(",", ""))
            if account_nm == "영업이익":
                operation_profit = int(sector["thstrm_amount"].replace(",", ""))
        opm = operation_profit / revenue * 100
        return res_tuple(revenue, operation_profit, opm)
    except:
        return False

analyzed_data_tuple = namedtuple('analyzed_data', ("name", "data_packs"))
def getAnalyzedData(path):
    with open(path, 'r', encoding="UTF-8") as file:
        data = json.load(file)
        analyzed_datas = []
        for name, accounts in data.items():
            analyzed_data = analyze(accounts)
            if analyzed_data != False:
                analyzed_datas.append(analyzed_data_tuple(name, analyzed_data))
        return analyzed_datas

corp_df = pandas.DataFrame()

for year in years:
    quarter_path = "data/json/" + year + "_fourth_quarter_정보.json"
    datas = getAnalyzedData(quarter_path)
    corporations = [data.name for data in datas]
    opms = [data.data_packs.opm for data in datas]
    temp_df = pandas.DataFrame({year: opms}, index=corporations)
    if corp_df.empty:
        corp_df = temp_df
    else:
        corp_df = pandas.merge(corp_df, temp_df, left_index=True, right_index=True, how='outer')

indexes = []
for idx in corp_df.index:
    target_corp = corp_df.loc[idx]
    for year in years:
        if (target_corp[year] > 30 or target_corp[year] < 1) and idx not in indexes:
            indexes.append(idx)
    count = target_corp.count()
    three_years = target_corp.loc["2019":"2021"]
    if three_years.isnull().values.any() and idx not in indexes: 
        indexes.append(idx)
    if (target_corp["2021"] < target_corp["2020"] or target_corp["2020"] < target_corp["2019"] or target_corp["2019"] < target_corp["2018"]) and idx not in indexes:
       indexes.append(idx)
     

corp_df.drop(indexes, inplace=True)

corp_df["Gap"] = corp_df["2021"] - corp_df["2020"]
corp_df = corp_df.sort_values(by="Gap", ascending=False)
corp_df.to_excel("df.xlsx")

In [33]:
corp_len = len(corp_df)
copy_corp_df = corp_df.copy()
for i in range(0, corp_df.shape[0]):
  target_corp = copy_corp_df.index[i]
  copy_corp_df.iloc[i]["2019", "2020", "2021"] = corp_df.iloc[i][["2019", "2020", "2021"]].apply(lambda x: x * ((corp_len - i) / corp_len))

copy_corp_df.to_excel("applyed_df.xlsx")

In [14]:
import plotly.express as px
import plotly.graph_objects as go
pandas.options.plotting.backend = "plotly"





In [21]:
fig = go.Figure()

for idx in corp_df.index:
  item = corp_df.loc[idx]
  fig.add_trace(go.Scatter(y=item, name=idx))

fig.show()