## 구문 분석

1. spacy 
2. nltk

In [1]:
!pip install spacy

Collecting spacy
  Downloading spacy-3.7.0-cp39-cp39-win_amd64.whl (12.1 MB)
     --------------------------------------- 12.1/12.1 MB 12.6 MB/s eta 0:00:00
Collecting thinc<8.3.0,>=8.1.8
  Downloading thinc-8.2.1-cp39-cp39-win_amd64.whl (1.5 MB)
     ---------------------------------------- 1.5/1.5 MB 23.5 MB/s eta 0:00:00
Collecting spacy-loggers<2.0.0,>=1.0.0
  Downloading spacy_loggers-1.0.5-py3-none-any.whl (22 kB)
Collecting pathy>=0.10.0
  Downloading pathy-0.10.2-py3-none-any.whl (48 kB)
     ---------------------------------------- 48.9/48.9 kB ? eta 0:00:00
Collecting spacy-legacy<3.1.0,>=3.0.11
  Downloading spacy_legacy-3.0.12-py2.py3-none-any.whl (29 kB)
Collecting preshed<3.1.0,>=3.0.2
  Downloading preshed-3.0.9-cp39-cp39-win_amd64.whl (122 kB)
     -------------------------------------- 122.7/122.7 kB 7.0 MB/s eta 0:00:00
Collecting catalogue<2.1.0,>=2.0.6
  Downloading catalogue-2.0.10-py3-none-any.whl (17 kB)
Collecting wasabi<1.2.0,>=0.9.1
  Downloading wasabi-1.1.2-

In [2]:
import spacy

In [4]:
!python -m spacy download ko_core_news_sm

Collecting ko-core-news-sm==3.7.0
  Downloading https://github.com/explosion/spacy-models/releases/download/ko_core_news_sm-3.7.0/ko_core_news_sm-3.7.0-py3-none-any.whl (14.7 MB)
     --------------------------------------- 14.7/14.7 MB 12.6 MB/s eta 0:00:00
Installing collected packages: ko-core-news-sm
Successfully installed ko-core-news-sm-3.7.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('ko_core_news_sm')


In [5]:
# 한국어 모델 로드
nlp = spacy.load("ko_core_news_sm")

In [6]:
doc = nlp("이것은 한국어로 된 문장입니다.")

In [7]:
for token in doc :
    print(f"token : {token.text} || token.dep_ : {token.dep_} || token.head.text : {token.head.text}")

token : 이것은 || token.dep_ : dislocated || token.head.text : 문장입니다
token : 한국어로 || token.dep_ : nsubj || token.head.text : 된
token : 된 || token.dep_ : ccomp || token.head.text : 문장입니다
token : 문장입니다 || token.dep_ : ROOT || token.head.text : 문장입니다
token : . || token.dep_ : punct || token.head.text : 문장입니다


In [8]:
from spacy import displacy

In [10]:
displacy.render(doc , style='dep', jupyter=True)

In [11]:
import pandas as pd

In [12]:
train_df = pd.read_csv('./data/ratings_train.csv')

In [13]:
sample_data = train_df['document'][100]
sample_data

'신카이 마코토의 작화와,미유와 하나카나가 연기를 잘해줘서 더대박이였다.'

In [14]:
doc = nlp(sample_data)

In [15]:
displacy.render(doc , style='dep', jupyter=True)

사용자 사전을 추가하여 구문분석

In [36]:
import konlpy
import nltk
from ckonlpy.tag import Twitter

In [46]:
okt = Twitter()

In [47]:
okt.add_dictionary("미유", 'Noun')
okt.add_dictionary("하나카나", 'Noun')

In [48]:
words = okt.pos(sample_data)
words

[('신카이', 'Noun'),
 ('마코토', 'Noun'),
 ('의', 'Josa'),
 ('작화', 'Noun'),
 ('와', 'Josa'),
 (',', 'Punctuation'),
 ('미유', 'Noun'),
 ('와', 'Josa'),
 ('하나카나', 'Noun'),
 ('가', 'Josa'),
 ('연기', 'Noun'),
 ('를', 'Josa'),
 ('잘', 'VerbPrefix'),
 ('해줘서', 'Verb'),
 ('더', 'Noun'),
 ('대박', 'Noun'),
 ('이', 'Josa'),
 ('였다', 'Verb'),
 ('.', 'Punctuation')]

In [49]:
# words = okt.morphs(sample_data)
# words

In [50]:
grammar = """
NP: {<N.*>*<Suffix>?} # Noun phrase
VP: {<V.*>*}          # Verb phrase
Ap: {<A.*>*}          # Adjective phrase
"""

parser = nltk.RegexpParser(grammar= grammar)
chunks = parser.parse(words)
print(chunks.pprint())

(S
  (NP 신카이/Noun 마코토/Noun)
  의/Josa
  (NP 작화/Noun)
  와/Josa
  ,/Punctuation
  (NP 미유/Noun)
  와/Josa
  (NP 하나카나/Noun)
  가/Josa
  (NP 연기/Noun)
  를/Josa
  (VP 잘/VerbPrefix 해줘서/Verb)
  (NP 더/Noun 대박/Noun)
  이/Josa
  (VP 였다/Verb)
  ./Punctuation)
None


In [51]:
chunks.draw()