#### WISDOMAIN 데이터 기반 특허 분석 실습

##### 1. 데이터 준비

In [2]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

# Import statements required for Plotly 
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go
import plotly.tools as tls

# Import and suppress warnings
import warnings
warnings.filterwarnings('ignore')

In [196]:
# WISDOMAIN 원본 데이터 확인
rawdata = pd.read_csv('../data/rawdata_q2_2.csv')
rawdata.head(1)

Unnamed: 0,번호,명칭,요약,등록일,출원인,출원인국가,독립 청구항수,전체 청구항수,명칭(원문),요약(원문),...,통상실시권자,발명자국가,공통특허분류,미국특허분류,소유권이전여부,소유권최종양도일,현재소유자,권리 현황,최종 상태,권리만료예상일
0,US9607236,Method and apparatus for providing loan verifi...,Some aspects of the invention relate to a mobi...,2017.03.28,"Blinker, Inc.",US,5.0,30,,,...,,US|US|US,"G06V20/63, G06Q40/025, G06T11/60, G06V10/267, ...",001/001,N,,BLINKER INC,Alive,Alive,2034.06.27


In [197]:
# 컬럼명 확인
rawdata.columns

Index(['번호', '명칭', '요약', '등록일', '출원인', '출원인국가', '독립 청구항수', '전체 청구항수', '명칭(원문)',
       '요약(원문)', '국제특허분류', '자국인용특허', '외국인용특허', '자국피인용특허', '자국인용횟수', '자국피인용횟수',
       '특허평가등급', '발명자수', '발명자', '출원인대표명', '출원인주소', '전용실시권자', '통상실시권자', '발명자국가',
       '공통특허분류', '미국특허분류', '소유권이전여부', '소유권최종양도일', '현재소유자', '권리 현황', '최종 상태',
       '권리만료예상일'],
      dtype='object')

In [198]:
# 필요한 컬럼들 정의
target_cols = ['번호','명칭','요약','등록일','출원인','독립 청구항수','전체 청구항수',
               '국제특허분류','자국인용특허', '외국인용특허', '자국피인용특허', '자국인용횟수', 
               '자국피인용횟수','특허평가등급', '발명자수','발명자국가','출원인대표명']
dataset = rawdata[target_cols]
dataset.head(1)

Unnamed: 0,번호,명칭,요약,등록일,출원인,독립 청구항수,전체 청구항수,국제특허분류,자국인용특허,외국인용특허,자국피인용특허,자국인용횟수,자국피인용횟수,특허평가등급,발명자수,발명자국가,출원인대표명
0,US9607236,Method and apparatus for providing loan verifi...,Some aspects of the invention relate to a mobi...,2017.03.28,"Blinker, Inc.",5.0,30,"G06K-009/00, G06K-009/32, G06T-011/60, G06Q-04...","US3544771,US3550084,US5227803,US5579008,US5579...","CN103985256,CN204303027,HU03002998,JP10134219,...","US9888235,US9990726,US10445873,US10515285,US10...",593.0,17.0,A+,3,US|US|US,BLINKER INC


In [199]:
def change_ipc(row):
    ipcs = [ipc.split('/')[0].split('-') for ipc in [item.strip() for item in row]]
    ipcs_revised = list(set([f"{each_ipc[0]}{each_ipc[1].replace('0','')}" for each_ipc in ipcs]))
    return ipcs_revised

In [200]:
def count_len(row):
    if type(row) == list:
        return len(row)
    else:
        return 0
    
def remove_duplicated_list(row):
    after_row = list(set(row))
    return after_row

In [201]:
dataset['국제특허분류'] = dataset['국제특허분류'].str.split(',')
dataset['외국인용횟수'] = dataset['외국인용특허'].str.split(',')
dataset['발명자국가'] = dataset['발명자국가'].str.split('|')

In [202]:
dataset['국제특허분류'] = dataset['국제특허분류'].apply(change_ipc)
dataset['외국인용횟수'] = dataset['외국인용특허'].apply(count_len)
dataset['발명자국가'] = dataset['발명자국가'].apply(remove_duplicated_list)

In [203]:
dataset.head(1)

Unnamed: 0,번호,명칭,요약,등록일,출원인,독립 청구항수,전체 청구항수,국제특허분류,자국인용특허,외국인용특허,자국피인용특허,자국인용횟수,자국피인용횟수,특허평가등급,발명자수,발명자국가,출원인대표명,외국인용횟수
0,US9607236,Method and apparatus for providing loan verifi...,Some aspects of the invention relate to a mobi...,2017.03.28,"Blinker, Inc.",5.0,30,"[G06K9, G06Q4, G06T11]","US3544771,US3550084,US5227803,US5579008,US5579...","CN103985256,CN204303027,HU03002998,JP10134219,...","US9888235,US9990726,US10445873,US10515285,US10...",593.0,17.0,A+,3,[US],BLINKER INC,0


In [74]:
# 한국, 미국 데이터 나누기
dataset_kr = dataset[dataset['번호'].str.contains('KR')]
dataset_us = dataset[dataset['번호'].str.contains('US')]

In [75]:
dataset_kr.iloc[0]['국제특허분류']

['G06Q4', 'G06Q2', 'G06Q3']

In [76]:
dataset_us.iloc[0]

번호                                                 US9607236
명칭         Method and apparatus for providing loan verifi...
요약         Some aspects of the invention relate to a mobi...
등록일                                               2017.03.28
출원인                                            Blinker, Inc.
독립 청구항수                                                    5
전체 청구항수                                                   30
국제특허분류                                [G06K9, G06Q4, G06T11]
자국인용특허     US3544771,US3550084,US5227803,US5579008,US5579...
외국인용특허     CN103985256,CN204303027,HU03002998,JP10134219,...
자국피인용특허    US9888235,US9990726,US10445873,US10515285,US10...
자국인용횟수                                                   593
자국피인용횟수                                                   17
특허평가등급                                                    A+
발명자수                                                       3
출원인대표명                                           BLINKER INC
Name: 0, dtype: object