## Import libraries

In [1]:
import pandas as pd
import numpy as np

import ast

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, roc_auc_score, roc_curve

from sklearn.linear_model import LogisticRegression

import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (16, 12)

In [2]:
import emoji

## Raw data reading

In [3]:
media_info = pd.read_csv('dataset/media_info.csv')
columns_info = ['id', 'media_type', 'comment_count', 'like_count', 'caption', 'product_type']

media_info = media_info[columns_info]
print(media_info.shape)
media_info = pd.concat([media_info[['id', 'media_type', 'comment_count', 'like_count', 'product_type']],
                       pd.DataFrame(media_info['caption'].apply(ast.literal_eval).tolist())[['text', 'created_at']]],
                       axis=1)
media_info.text = media_info.text.apply(lambda x: x.replace(u"\t", ' ').replace(u"\n", ' '))
media_info.to_excel('excel/media_info.xlsx', index=False)
media_info

(547, 6)


Unnamed: 0,id,media_type,comment_count,like_count,product_type,text,created_at
0,2552627566372576640_6323132732,2,0,151,feed,üíâYessenov University-–¥–µ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç “õ—ã–∑–º–µ—Ç–∫–µ—Ä–ª...,1618740859
1,2552302050930317520_6323132732,8,0,167,carousel_container,üìåYessenov University 14 —Å”ô—É—ñ—Ä –∫“Ø–Ω—ñ ¬´”ò–ª–µ—É–º–µ—Ç—Ç—ñ–∫...,1618478195
2,2551856921623855106_6323132732,2,2,234,igtv,17 –Ω–∞—É—Ä—ã–∑ –∫“Ø–Ω—ñ Yessenov University —Å—Ç—É–¥–µ–Ω—Ç—Ç–µ—Ä—ñ...,1618478607
3,2551051845710673418_6323132732,8,0,127,carousel_container,üìå13 —Å”ô—É—ñ—Ä –∫“Ø–Ω—ñ Yessenov University “±–π—ã–º–¥–∞—Å—Ç—ã—Ä—É...,1618329101
4,2551045694403308072_6323132732,1,0,79,feed,üìåYessenov University –ë—ñ–ª—ñ–º –±–µ—Ä—É –º–µ–∫—Ç–µ–±—ñ–Ω—ñ“£ “±–π—ã...,1618328367
...,...,...,...,...,...,...,...
542,2084913117751324217_6323132732,2,2,553,igtv,#–æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ #–¥–µ—Ç–∏ #—à–∫–æ–ª–∞ #–Ω–∞—É–∫–∞ #–æ–±—É—á–µ–Ω–∏–µ #–ø...,1562841855
543,2084744888537924606_6323132732,1,17,321,feed,“ö“±—Ä–º–µ—Ç—Ç—ñ —Ç–∞–ª–∞–ø–∫–µ—Ä–ª–µ—Ä! –ê—Ä–Ω–∞–π—ã —Å—ñ–∑–¥–µ—Ä “Ø—à—ñ–Ω! “ö–∞–±—ã...,1562740979
544,2084231514720862896_6323132732,8,0,365,carousel_container,"Yessenov University-–¥–µ ""–ù“±—Ä –û—Ç–∞–Ω"" –ø–∞—Ä—Ç–∏—è—Å—ã –ú–∞...",1562679781
545,2081770835984285444_6323132732,1,0,171,feed,Yessen–æv University –±–∞—Ä—à–∞ “õ–∞–∑–∞“õ—Å—Ç–∞–Ω–¥—ã“õ—Ç–∞—Ä–¥—ã –µ–ª...,1562386444


In [4]:
import re
def get_hashtags(text):
    return ' '.join(re.findall(r"#(\w+)", str(text)))

media_info['hashtags'] = media_info.text.apply(get_hashtags)
media_info[['id', 'hashtags']].to_excel('excel/instagram_hashtags.xlsx', index=False)

In [4]:
media_info_comments = pd.read_csv('dataset/media_info_comments.csv')[['media_id', 'user_id', 'text',
                                                                      'type', 'created_at', 'comment_like_count']]
media_info_comments.comment_like_count = media_info_comments.comment_like_count.fillna(0)
media_info_comments.text = media_info_comments.text.apply(lambda x: x.replace(u"\t", ' ').replace(u"\n", ' '))
media_info_comments.to_excel('excel/media_info_comments.xlsx', index=False)
media_info_comments

Unnamed: 0,media_id,user_id,text,type,created_at,comment_like_count
0,2551856921623855106_6323132732,5653049971,üëèüëèüëèüëè,0,1618434226,0.0
1,2551856921623855106_6323132732,8004212870,üòçüòçüòçüòç,0,1618463343,0.0
2,2550773232809730389_6323132732,21717688850,üôåü§≤ü§≤ü§≤,0,1618300081,0.0
3,2550750258283139041_6323132732,44136745886,"–ë–∞—É—ã—Ä—ã–º,–±–∏—ñ–∫—Ç–µ—Ä–¥–µ–Ω –∫”©—Ä—ñ–Ω–µ –±–µ—Ä !!! –ë–∞“õ—ã—Ç—Ç—ã –±–æ–ª!!!",0,1618314233,0.0
4,2550428231701540075_6323132732,7159856619,üëè,0,1618254886,0.0
...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,6742136658,@rakhmet_92 —Å–µ–Ω –±–∞ –∞–∫ –∫–∏–∏–º–¥–µ–≥–∏ @qazaq_qylyq @...,0,1564052043,0.0
7622,2081356494843913014_6323132732,1538214058,@raysoul28 –æ–ª –µ–º–µ—Å,2,1564052248,0.0
7623,2081356494843913014_6323132732,465619292,"@raysoul28 –∫”©—Ä–º–µ–≥–µ–ªi –∫”©–ø –±–æ–ª–¥—ã, —Ç“Ø—Ä–¥ “±–º—ã—Ç—ã–ø –∫–∞...",2,1564052313,0.0
7624,2081356494843913014_6323132732,6742136658,@rakhmet_92 —Å–æ —Å–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –±–æ–ª–¥ –∞–∏—Ç—à–∏—à ?,2,1564052718,0.0


## Reading translated Data

In [5]:
media_i = pd.read_csv('translated/media_info.txt', sep='\t', encoding='utf8').drop('product_type', axis=1)
media_i

Unnamed: 0,id,media_type,comment_count,media_like_count,media_text,media_created_at
0,2552627566372576640_6323132732,2.0,0,151,üíâ–ï—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø—Ä–æ–≤–æ–¥–∏—Ç –¥–æ–±—Ä–æ–≤–æ–ª—å–Ω—É—é ...,1.618741e+09
1,2552302050930317520_6323132732,8.0,0,167,"üìå–ï—Å—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç 14 –∞–ø—Ä–µ–ª—è, –æ—Ä–≥–∞–Ω–∏–∑–æ–≤–∞...",1.618478e+09
2,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –ï—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1.618479e+09
3,2551051845710673418_6323132732,8.0,0,127,üìå 13 –∞–ø—Ä–µ–ª—è –≤ –ï—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –æ...,1.618329e+09
4,2551045694403308072_6323132732,1.0,0,79,üìå –ü–µ–¥–∞–≥–æ–≥–∏—á–µ—Å–∫–∞—è —à–∫–æ–ª–∞ –ï—Å—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ...,1.618328e+09
...,...,...,...,...,...,...
543,2084913117751324217_6323132732,2.0,2,553,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...,1.562842e+09
544,2084744888537924606_6323132732,1.0,17,321,–£–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! –°–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –í–∞—Å! –ü—Ä–∏–µ...,1.562741e+09
545,2084231514720862896_6323132732,8.0,0,365,–í –ï—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ XXXI –≤–Ω–µ–æ—á–µ—Ä–µ...,1.562680e+09
546,2081770835984285444_6323132732,1.0,0,171,–ï—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –∫–∞–∑–∞—Ö—Å...,1.562386e+09


In [6]:
media_c = pd.read_csv('translated/media_comments.txt', sep='\t', encoding='utf8')
media_c

Unnamed: 0,media_id,user_id,comment_text,comment_type,comment_created_at,comment_like_count
0,2551856921623855106_6323132732,5653049971,üëèüëèüëèüëè,0,1618434226,0
1,2551856921623855106_6323132732,8004212870,üòçüòçüòçüòç,0,1618463343,0
2,2550773232809730389_6323132732,21717688850,üôåü§≤ü§≤ü§≤,0,1618300081,0
3,2550750258283139041_6323132732,44136745886,"–ë—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –ë—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0
4,2550428231701540075_6323132732,7159856619,üëè,0,1618254886,0
...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,6742136658,@ rakhmet_92 —Ç—ã @qazaq_qylyq @ aiganym__12 –≤ –±...,0,1564052043,0
7622,2081356494843913014_6323132732,1538214058,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0
7623,2081356494843913014_6323132732,465619292,"–£ @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0
7624,2081356494843913014_6323132732,6742136658,@ rakhmet_92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0


In [7]:
comment_df = pd.concat([media_i.set_index('id').loc[media_c.media_id],
                        media_c.set_index('media_id')], axis=1)
comment_df['media_id'] = comment_df.index
comment_df = comment_df[['media_id', 'media_type', 'comment_count', 'media_like_count',
                        'media_text', 'media_created_at', 'user_id', 'comment_text',
                        'comment_type', 'comment_created_at', 'comment_like_count', ]]

comment_df.media_created_at = comment_df.media_created_at.fillna(0).astype('int64')
comment_df.index = range(comment_df.shape[0])
comment_df

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_text,media_created_at,user_id,comment_text,comment_type,comment_created_at,comment_like_count
0,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –ï—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,5653049971,üëèüëèüëèüëè,0,1618434226,0
1,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –ï—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,8004212870,üòçüòçüòçüòç,0,1618463343,0
2,2550773232809730389_6323132732,1.0,1,205,–ï—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,1618295887,21717688850,üôåü§≤ü§≤ü§≤,0,1618300081,0
3,2550750258283139041_6323132732,8.0,1,205,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –ï—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,1618293148,44136745886,"–ë—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –ë—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0
4,2550428231701540075_6323132732,8.0,4,332,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,1618254760,7159856619,üëè,0,1618254886,0
...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2.0,63,1335,–ú–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet_92 —Ç—ã @qazaq_qylyq @ aiganym__12 –≤ –±...,0,1564052043,0
7622,2081356494843913014_6323132732,2.0,63,1335,–ú–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,1538214058,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0
7623,2081356494843913014_6323132732,2.0,63,1335,–ú–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,465619292,"–£ @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0
7624,2081356494843913014_6323132732,2.0,63,1335,–ú–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet_92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0


## Preprocessing

In [8]:
import re
import emoji

def cleaning_data(text):
#     if text.split(' ')[0][0] == "@":
#         text = ' '.join(text.split(" ")[1:])
    text = re.sub(r'^https?:\/\/.*[\r\n]*', ' ', text)
    text = str(text).replace('(<br/>)', '')
    text = text.replace('(<a).*(>).*(</a>)', '')
    text = text.replace('(&amp)', '')
    text = text.replace('(&gt)', '')
    text = text.replace('(&lt)', '')
    text = text.replace('(\xa0)', ' ')
    text = text.replace('-', ' ')
    text = text.replace('(', ' ')
    text = text.replace(')', ' ')
    # text = re.sub('[^–ê-–Ø,–∞-—è,”ò,–Ü,“¢,“í,“Æ,“∞,“ö,”®,“∫,”ô,—ñ,…ô,“£,“ì,“Ø,“±,“õ,”©,“ª]', ' ', str(text).replace('-', ''))
    text = re.sub('_', '', text)
    text = re.sub('\s+', ' ', text)
    text = emoji.emojize(emoji.demojize(text).replace('::', ': :'))
    return str(text).lower().strip()

In [9]:
emoji.demojize('üòçüòçüòçüòç')

':smiling_face_with_heart-eyes::smiling_face_with_heart-eyes::smiling_face_with_heart-eyes::smiling_face_with_heart-eyes:'

In [10]:
emoji.emojize(':smiling_face_with_heart-eyes::smiling_face_with_heart-eyes::smiling_face_with_heart-eyes::smiling_face_with_heart-eyes:'.replace('::', ': :'))

'üòç üòç üòç üòç'

In [11]:
comment_df['media_text'] = comment_df.media_text.astype(str).apply(cleaning_data)
comment_df['comment_text'] = comment_df.comment_text.astype(str).apply(cleaning_data)
comment_df.head()

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_text,media_created_at,user_id,comment_text,comment_type,comment_created_at,comment_like_count
0,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,5653049971,üëè üëè üëè üëè,0,1618434226,0
1,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,8004212870,üòç üòç üòç üòç,0,1618463343,0
2,2550773232809730389_6323132732,1.0,1,205,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,1618295887,21717688850,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0
3,2550750258283139041_6323132732,8.0,1,205,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,1618293148,44136745886,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0
4,2550428231701540075_6323132732,8.0,4,332,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,1618254760,7159856619,üëè,0,1618254886,0


In [12]:
filtired_df = comment_df[comment_df['comment_text'].apply(lambda x: True if len(x.split(' ')) > 0 else False)]
filtired_df

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_text,media_created_at,user_id,comment_text,comment_type,comment_created_at,comment_like_count
0,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,5653049971,üëè üëè üëè üëè,0,1618434226,0
1,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,8004212870,üòç üòç üòç üòç,0,1618463343,0
2,2550773232809730389_6323132732,1.0,1,205,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,1618295887,21717688850,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0
3,2550750258283139041_6323132732,8.0,1,205,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,1618293148,44136745886,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0
4,2550428231701540075_6323132732,8.0,4,332,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,1618254760,7159856619,üëè,0,1618254886,0
...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet92 —Ç—ã @qazaqqylyq @ aiganym12 –≤ –±–µ–ª–æ–º,0,1564052043,0
7622,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,1538214058,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0
7623,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,465619292,"—É @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0
7624,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0


In [13]:
from datetime import datetime
filtired_df['post_datetime'] = filtired_df.media_created_at.apply(lambda x: datetime.utcfromtimestamp(x).strftime('%d.%m.%Y %H:%M:%S'))
filtired_df['comment_datetime'] = filtired_df.comment_created_at.apply(lambda x: datetime.utcfromtimestamp(x).strftime('%d.%m.%Y %H:%M:%S'))
filtired_df

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_text,media_created_at,user_id,comment_text,comment_type,comment_created_at,comment_like_count,post_datetime,comment_datetime
0,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,5653049971,üëè üëè üëè üëè,0,1618434226,0,15.04.2021 09:23:27,14.04.2021 21:03:46
1,2551856921623855106_6323132732,2.0,2,234,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,1618478607,8004212870,üòç üòç üòç üòç,0,1618463343,0,15.04.2021 09:23:27,15.04.2021 05:09:03
2,2550773232809730389_6323132732,1.0,1,205,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,1618295887,21717688850,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0,13.04.2021 06:38:07,13.04.2021 07:48:01
3,2550750258283139041_6323132732,8.0,1,205,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,1618293148,44136745886,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0,13.04.2021 05:52:28,13.04.2021 11:43:53
4,2550428231701540075_6323132732,8.0,4,332,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,1618254760,7159856619,üëè,0,1618254886,0,12.04.2021 19:12:40,12.04.2021 19:14:46
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet92 —Ç—ã @qazaqqylyq @ aiganym12 –≤ –±–µ–ª–æ–º,0,1564052043,0,05.07.2019 14:31:40,25.07.2019 10:54:03
7622,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,1538214058,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0,05.07.2019 14:31:40,25.07.2019 10:57:28
7623,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,465619292,"—É @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0,05.07.2019 14:31:40,25.07.2019 10:58:33
7624,2081356494843913014_6323132732,2.0,63,1335,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,1562337100,6742136658,@ rakhmet92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0,05.07.2019 14:31:40,25.07.2019 11:05:18


In [14]:
filtired_df.to_excel('excel/insta_comments.xlsx')

## Sentimental model classifier

In [15]:
from dostoevsky.tokenization import RegexTokenizer
from dostoevsky.models import FastTextSocialNetworkModel

tokenizer = RegexTokenizer()
model = FastTextSocialNetworkModel(tokenizer=tokenizer)



In [16]:
results = pd.DataFrame(model.predict(filtired_df.comment_text.astype(str)))
results['media_id'] = filtired_df['media_id'].values
results['media_type'] = filtired_df['media_type'].astype('int64').values
results['comment_count'] = filtired_df['comment_count'].values
results['media_like_count'] = filtired_df['media_like_count'].values
results['media_created_at'] = filtired_df['media_created_at'].values
results['post_text'] = filtired_df['media_text'].astype(str).apply(cleaning_data).values

results['comment_text'] = filtired_df['comment_text'].astype(str).apply(cleaning_data).values
results['comment_type'] = filtired_df['comment_type'].values
results['comment_created_at'] = filtired_df['comment_created_at'].values
results['comment_like_count'] = filtired_df['comment_like_count'].values


results = results[['media_id', 'media_type', 'comment_count', 'media_like_count', 'media_created_at',
                   'post_text', 'comment_text', 'comment_type', 'comment_created_at', 'comment_like_count',
                   'negative', 'positive', 'skip', 'neutral', 'speech', ]]
results['negative_prob'] = (results.negative / results.loc[:,
                                                            ['negative', 'positive',
                                                             'skip', 'neutral', 'speech',]].sum(axis=1)) * 100
results['positive_prob'] = (results.positive / results.loc[:,
                                                            ['negative', 'positive',
                                                             'skip', 'neutral', 'speech',]].sum(axis=1)) * 100
results['negative_boolean'] = results.negative_prob > results.positive_prob
results

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_created_at,post_text,comment_text,comment_type,comment_created_at,comment_like_count,negative,positive,skip,neutral,speech,negative_prob,positive_prob,negative_boolean
0,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üëè üëè üëè üëè,0,1618434226,0,0.000010,0.977724,0.004620,0.217348,0.000010,0.000834,81.496619,False
1,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üòç üòç üòç üòç,0,1618463343,0,0.000010,0.890304,0.005921,0.000010,0.002991,0.001112,99.006701,False
2,2550773232809730389_6323132732,1,1,205,1618295887,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0,0.000010,0.000010,0.003085,1.000010,0.000010,0.000997,0.000997,False
3,2550750258283139041_6323132732,8,1,205,1618293148,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0,0.004342,0.484390,0.040856,0.033096,0.414909,0.444102,49.549301,False
4,2550428231701540075_6323132732,8,4,332,1618254760,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,üëè,0,1618254886,0,0.000010,0.000010,0.000010,1.000010,0.000010,0.001000,0.001000,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç—ã @qazaqqylyq @ aiganym12 –≤ –±–µ–ª–æ–º,0,1564052043,0,0.014514,0.048868,0.016925,0.988323,0.009423,1.346279,4.532973,False
7622,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0,0.001559,0.002725,0.014074,0.998765,0.000010,0.153251,0.267909,False
7623,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,"—É @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0,0.007826,0.010024,0.050341,0.983607,0.001711,0.742841,0.951447,False
7624,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0,0.026769,0.003283,0.075868,0.979046,0.001144,2.464694,0.302277,True


In [17]:
toreint = lambda x: int(re.sub('[^0-9]', '', str(x)))

In [18]:
results.media_type = results.media_type.apply(toreint)
results.comment_count = results.comment_count.apply(toreint)
results.media_like_count = results.media_like_count.apply(toreint)
results.media_created_at = results.media_created_at.apply(toreint)
results.comment_type = results.comment_type.apply(toreint)
results.comment_created_at = results.comment_created_at.apply(toreint)
results.comment_like_count = results.comment_like_count.apply(toreint)
results = results.fillna(0)
results

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_created_at,post_text,comment_text,comment_type,comment_created_at,comment_like_count,negative,positive,skip,neutral,speech,negative_prob,positive_prob,negative_boolean
0,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üëè üëè üëè üëè,0,1618434226,0,0.000010,0.977724,0.004620,0.217348,0.000010,0.000834,81.496619,False
1,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üòç üòç üòç üòç,0,1618463343,0,0.000010,0.890304,0.005921,0.000010,0.002991,0.001112,99.006701,False
2,2550773232809730389_6323132732,1,1,205,1618295887,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0,0.000010,0.000010,0.003085,1.000010,0.000010,0.000997,0.000997,False
3,2550750258283139041_6323132732,8,1,205,1618293148,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0,0.004342,0.484390,0.040856,0.033096,0.414909,0.444102,49.549301,False
4,2550428231701540075_6323132732,8,4,332,1618254760,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,üëè,0,1618254886,0,0.000010,0.000010,0.000010,1.000010,0.000010,0.001000,0.001000,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç—ã @qazaqqylyq @ aiganym12 –≤ –±–µ–ª–æ–º,0,1564052043,0,0.014514,0.048868,0.016925,0.988323,0.009423,1.346279,4.532973,False
7622,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0,0.001559,0.002725,0.014074,0.998765,0.000010,0.153251,0.267909,False
7623,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,"—É @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0,0.007826,0.010024,0.050341,0.983607,0.001711,0.742841,0.951447,False
7624,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0,0.026769,0.003283,0.075868,0.979046,0.001144,2.464694,0.302277,True


In [19]:
results

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_created_at,post_text,comment_text,comment_type,comment_created_at,comment_like_count,negative,positive,skip,neutral,speech,negative_prob,positive_prob,negative_boolean
0,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üëè üëè üëè üëè,0,1618434226,0,0.000010,0.977724,0.004620,0.217348,0.000010,0.000834,81.496619,False
1,2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,üòç üòç üòç üòç,0,1618463343,0,0.000010,0.890304,0.005921,0.000010,0.002991,0.001112,99.006701,False
2,2550773232809730389_6323132732,1,1,205,1618295887,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,üôå ü§≤ ü§≤ ü§≤,0,1618300081,0,0.000010,0.000010,0.003085,1.000010,0.000010,0.000997,0.000997,False
3,2550750258283139041_6323132732,8,1,205,1618293148,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,"–±—Ä–∞—Ç, –±—É–¥—å –Ω–∞ –≤–∏–¥—É —Å –≤—ã—Å–æ—Ç—ã !!! –±—É–¥—å —Å—á–∞—Å—Ç–ª–∏–≤!!!",0,1618314233,0,0.004342,0.484390,0.040856,0.033096,0.414909,0.444102,49.549301,False
4,2550428231701540075_6323132732,8,4,332,1618254760,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,üëè,0,1618254886,0,0.000010,0.000010,0.000010,1.000010,0.000010,0.001000,0.001000,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7621,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç—ã @qazaqqylyq @ aiganym12 –≤ –±–µ–ª–æ–º,0,1564052043,0,0.014514,0.048868,0.016925,0.988323,0.009423,1.346279,4.532973,False
7622,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rayoul28 –æ–Ω –Ω–µ,2,1564052248,0,0.001559,0.002725,0.014074,0.998765,0.000010,0.153251,0.267909,False
7623,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,"—É @ rayoul28 –±—ã–ª–æ –º–Ω–æ–≥–æ –≤—ã—Å—Ç–∞–≤–æ–∫, —Ç—ã –∑–∞–±—ã–ª –ø—Ä–æ...",2,1564052313,0,0.007826,0.010024,0.050341,0.983607,0.001711,0.742841,0.951447,False
7624,2081356494843913014_6323132732,2,63,1335,1562337100,–º–∞–Ω–≥–∏—Å—Ç–∞—É –Ω–µ —Å–ª—É—á–∞–π–Ω–æ –Ω–∞–∑—ã–≤–∞—é—Ç –º—É–∑–µ–µ–º –ø–æ–¥ –æ—Ç–∫—Ä...,@ rakhmet92 —Ç–∞–∫ —á—Ç–æ –∫–æ—Ä–º–µ–≥–µ–ª–∏ –∫–æ–ø –∂–∏—Ä–Ω—ã–π –∞–π—Ç–∏—à?,2,1564052718,0,0.026769,0.003283,0.075868,0.979046,0.001144,2.464694,0.302277,True


In [20]:
results.sort_values('negative_prob')[-100:]

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_created_at,post_text,comment_text,comment_type,comment_created_at,comment_like_count,negative,positive,skip,neutral,speech,negative_prob,positive_prob,negative_boolean
4531,2373853714173495634_6323132732,1,64,199,1597205488,‚ùó–≤–Ω–∏–º–∞–Ω–∏–µ! –æ–±—Ä–∞–∑–æ–≤–∞—Ç–µ–ª—å–Ω—ã–µ –≥—Ä–∞–Ω—Ç—ã –∞–∫–∏–º–∞—Ç–∞ –º–∞–Ω–≥...,@gulfiyaatshibaeva –Ω–µ —Å–æ–∑–¥–∞–µ—Ç —Å–∞–π—Ç –∏–∑ –∑–∞ —Ç–µ—Ö–Ω–∏...,2,1597396512,0,0.422515,0.047436,0.075868,0.538993,0.008072,38.660524,4.340431,True
4396,2387109747575569389_6323132732,1,25,369,1598785679,–∞–±–∏—Ç—É—Ä–∏–µ–Ω—Ç—ã –∫–∞–∑–∞—Ö—Å—Ç–∞–Ω—Å–∫–æ–π –≥—Ä—É–ø–ø—ã –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É...,–ø–æ—á–µ–º—É –±—ã —Ç–µ–±–µ –Ω–µ –æ—Ç–∫—Ä—ã—Ç—å –≥—Ä—É–ø–ø—É?,0,1598892491,3,0.562187,0.000010,0.048868,0.839744,0.000010,38.749617,0.000689,True
1558,2460458648446548922_6323132732,8,86,416,1607529550,¬´11 –¥–µ–∫–∞–±—Ä—è –≤ 17:00 —Ä–µ–∫—Ç–æ—Ä –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å...,@ aaaaii94 –±—ã–ª –Ω–∞ —Å–µ—Å—Å–∏–∏ –≤ –ø—Ä–æ—à–ª–æ–º –≥–æ–¥—É! –≤—ã —Å–∫...,2,1607613809,0,0.362979,0.087574,0.063725,0.407343,0.013233,38.827357,9.367646,True
5572,2282135138639625971_6323132732,1,15,476,1586271731,–∫–∞–∫ —Ç–≤–æ–∏ –∫–∞—Ä–∞–Ω—Ç–∏–Ω–Ω—ã–µ –¥–Ω–∏? –≤—ã —Ä–∞–∑–≤–∏–≤–∞–µ—Ç–µ —Å–µ–±—è? ...,"–±–æ–∂–µ, –Ω–µ—Ç",0,1586279971,0,0.132974,0.022296,0.173298,0.012831,0.000010,38.948567,6.530622,True
4818,2341568656494785092_6323132732,2,33,687,1593356908,–¥–∞–≤–∞–π—Ç–µ –ø—Ä–∏–¥–∞–¥–∏–º –≤–∞—à–µ–π –ª–µ–Ω—Ç–µ –Ω–µ–º–Ω–æ–≥–æ –≤–æ–ª—à–µ–±—Å—Ç–≤...,–≤—Å–µ –±—É–¥–µ—Ç –Ω–æ –Ω–µ —Å—Ä–∞–∑—É,0,1593369388,0,0.507822,0.006703,0.008857,0.771854,0.000010,39.206608,0.517497,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4748,2349599070278733752_6323132732,1,16,617,1594316361,–¥–∞–≤–∞–π—Ç–µ –ø–æ–≤—ã—Å–∏–º –º–æ–∑–≥–æ–≤—É—é –∞–∫—Ç–∏–≤–Ω–æ—Å—Ç—å üß† –æ—Ç–≤–µ—Ç—ã –Ω...,–¥–µ–Ω—å–≥–∏ –≥—Ä–µ–±–µ–Ω—å –¥–µ–∫–∞–±—Ä—å,0,1594314806,2,0.924152,0.061886,0.042098,0.061886,0.001075,84.699369,5.671907,True
4755,2349599070278733752_6323132732,1,16,617,1594316361,–¥–∞–≤–∞–π—Ç–µ –ø–æ–≤—ã—Å–∏–º –º–æ–∑–≥–æ–≤—É—é –∞–∫—Ç–∏–≤–Ω–æ—Å—Ç—å üß† –æ—Ç–≤–µ—Ç—ã –Ω...,–¥–µ–Ω—å–≥–∏ –≥—Ä–µ–±–µ–Ω—å –¥–µ–∫–∞–±—Ä—å,0,1594316921,1,0.924152,0.061886,0.042098,0.061886,0.001075,84.699369,5.671907,True
5566,2282135138639625971_6323132732,1,15,476,1586271731,–∫–∞–∫ —Ç–≤–æ–∏ –∫–∞—Ä–∞–Ω—Ç–∏–Ω–Ω—ã–µ –¥–Ω–∏? –≤—ã —Ä–∞–∑–≤–∏–≤–∞–µ—Ç–µ —Å–µ–±—è? ...,"—Ç–∞–º, –≥–¥–µ —è —Å–∫—É—á–∞—é, —è —Å–æ–≤—Å–µ–º –Ω–µ —Å–∫—É—á–∞—é.",0,1586272626,2,0.968866,0.067557,0.092698,0.000677,0.000010,85.754990,5.979487,True
2528,2399539197338820502_6323132732,2,86,290,1600267590,"–∞ —Ç—ã –∫—Ç–æ —Å–µ–≥–æ–¥–Ω—è? üôÇ –∫—Ç–æ —Ç—ã —Å–µ–≥–æ–¥–Ω—è, –¥–∞–≤–∞–π—Ç–µ —Ä–∞...",–±–µ—Å—Å—Ç—Ä–∞—à–Ω—ã–µ –ª—é–¥–∏,0,1600268773,1,0.899131,0.010997,0.033096,0.073706,0.000010,88.415332,1.081375,True


## Top10 words for posts

In [21]:
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("russian")

import stanza
nlp = stanza.Pipeline(lang='ru', processors = "tokenize,lemma", tokenize_batch_size=16)

def stemming(sentences):
    return ' '.join([stemmer.stem(word) for word in sentences.split()])

def lemmatizing(text):
    return ' '.join([j.lemma for i in nlp(text).sentences for j in i.words])

2021-04-22 17:33:26 INFO: Loading these models for language: ru (Russian):
| Processor | Package   |
-------------------------
| tokenize  | syntagrus |
| lemma     | syntagrus |

2021-04-22 17:33:26 INFO: Use device: gpu
2021-04-22 17:33:26 INFO: Loading: tokenize
2021-04-22 17:33:30 INFO: Loading: lemma
2021-04-22 17:33:30 INFO: Done loading processors!


In [None]:
%%time
lemmated = results.drop_duplicates('media_id').post_text.apply(lemmatizing)

In [35]:
from nltk.corpus import stopwords

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words=[lemmatizing(word) for word in stopwords.words('russian')]+[str(i) for i in range(200)])
X_tfidf = tfidf.fit_transform(lemmated).toarray()
vocab = tfidf.vocabulary_
reverse_vocab = {v:k for k,v in vocab.items()}

feature_names = tfidf.get_feature_names()
idx = (-X_tfidf).argsort(axis=1)

tfidf_max10 = idx[:, :20]
tfidf_weight = -np.sort(-X_tfidf, axis=1)[:, :20]

df_tfidf = pd.DataFrame([[reverse_vocab.get(item) for item in row] for row in tfidf_max10])
cl_names = ['top_' + str(i+1) for i in range(20)]
df_tfidf.columns = cl_names
df_tfidf['media_id'] = results.drop_duplicates('media_id')['media_id'].values
df_tfidf = df_tfidf.set_index('media_id')
df_tfidf = df_tfidf[cl_names]
df_tfidf[['weight_' + str(i+1) for i in range(20)]] = -np.sort(-X_tfidf, axis=1)[:, :20]
df_tfidf = df_tfidf[np.array([[df_tfidf.columns[i], df_tfidf.columns[i+20]] for i in range(20)]).reshape(20*2)]
df_tfidf

## Clustering

In [101]:
posts_dataframe = results.drop_duplicates('media_id')[['media_id', 'post_text']]
posts_dataframe = posts_dataframe.set_index('media_id')
posts_dataframe

Unnamed: 0_level_0,post_text
media_id,Unnamed: 1_level_1
2551856921623855106_6323132732,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...
2550773232809730389_6323132732,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...
2550750258283139041_6323132732,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...
2550428231701540075_6323132732,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...
2550227268201948037_6323132732,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤–∞—Å —Å –¥–Ω–µ–º ...
...,...
2086384315387530462_6323132732,–ø—Ä–µ–¥–ª–∞–≥–∞–µ–º –≤–∞—à–µ–º—É –≤–Ω–∏–º–∞–Ω–∏—é –æ—á–µ—Ä–µ–¥–Ω–æ–π –≤—ã–ø—É—Å–∫ –ø—Ä...
2085649982255010735_6323132732,–¥–æ–±—Ä–æ –ø–æ–∂–∞–ª–æ–≤–∞—Ç—å –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç! 12 ...
2084913117751324217_6323132732,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...
2084744888537924606_6323132732,—É–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! —Å–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –≤–∞—Å! –ø—Ä–∏–µ...


In [102]:
train_df = pd.concat([posts_dataframe, results.groupby('media_id').mean().loc[:,['negative', 'positive',
                                                                                                 'skip', 'speech',]]], axis=1)
train_df

Unnamed: 0,post_text,negative,positive,skip,speech
2551856921623855106_6323132732,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,0.000010,0.934014,0.005270,0.001501
2550773232809730389_6323132732,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,0.000010,0.000010,0.003085,0.000010
2550750258283139041_6323132732,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,0.004342,0.484390,0.040856,0.414909
2550428231701540075_6323132732,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,0.012953,0.302760,0.095953,0.002089
2550227268201948037_6323132732,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤–∞—Å —Å –¥–Ω–µ–º ...,0.000010,0.777310,0.001075,0.000010
...,...,...,...,...,...
2086384315387530462_6323132732,–ø—Ä–µ–¥–ª–∞–≥–∞–µ–º –≤–∞—à–µ–º—É –≤–Ω–∏–º–∞–Ω–∏—é –æ—á–µ—Ä–µ–¥–Ω–æ–π –≤—ã–ø—É—Å–∫ –ø—Ä...,0.022414,0.061812,0.110492,0.093752
2085649982255010735_6323132732,–¥–æ–±—Ä–æ –ø–æ–∂–∞–ª–æ–≤–∞—Ç—å –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç! 12 ...,0.005017,0.478316,0.011814,0.000010
2084913117751324217_6323132732,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...,0.000388,0.004041,0.446665,0.000010
2084744888537924606_6323132732,—É–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! —Å–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –≤–∞—Å! –ø—Ä–∏–µ...,0.053253,0.009870,0.040726,0.038064


In [103]:
from nltk.corpus import stopwords

from sklearn.feature_extraction.text import TfidfVectorizer

tfidf_clus = TfidfVectorizer(stop_words=[stemming(word) for word in stopwords.words('russian')], max_df=0.1, min_df=0.001, )
X_tfidf_clus = tfidf_clus.fit_transform(lemmated).toarray()
print(X_tfidf_clus.shape)

(411, 6468)


In [104]:
from sklearn.cluster import KMeans
from collections import Counter

cluster_count = 19
model = KMeans(n_clusters=cluster_count, random_state=0).fit(np.hstack([X_tfidf_clus, train_df.loc[:, ['negative', 'positive',
                                                                                                 'skip', 'speech',]].values]))
model_prediction = model.predict(np.hstack([X_tfidf_clus, train_df.loc[:, ['negative', 'positive',
                                                                                                 'skip', 'speech',]].values]))
print(Counter(model_prediction).most_common())

[(3, 54), (15, 45), (2, 35), (0, 34), (7, 31), (16, 27), (13, 26), (9, 20), (6, 20), (5, 18), (8, 16), (18, 15), (4, 15), (14, 13), (11, 12), (12, 10), (17, 9), (10, 6), (1, 5)]


In [105]:
train_df['cluster_id'] = model_prediction
train_df.to_excel('excel/insta_cluster_posts.xlsx')

In [106]:
cluster_text = {}
for i in range(cluster_count):
    cluster_text[i] = ' '.join(train_df[train_df.cluster_id == i].post_text.values)

In [107]:
cluster_text.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])

In [108]:
tfidf_clus_top = TfidfVectorizer(stop_words=[stemming(word) for word in stopwords.words('russian')]+[str(i) for i in range(200)])
tfidf_clus_top_arr = tfidf_clus_top.fit_transform([lemmatizing(clusses) for clusses in list(cluster_text.values())]).toarray()
vocab = tfidf_clus_top.vocabulary_
reverse_vocab = {v:k for k,v in vocab.items()}

feature_names = tfidf_clus_top.get_feature_names()

In [114]:
idx = (-tfidf_clus_top_arr).argsort(axis=1)

tfidf_max10 = idx[:, :100]
tfidf_weight = -np.sort(-tfidf_clus_top_arr, axis=1)[:, :100]

df_tfidf_c = pd.DataFrame([[reverse_vocab.get(item) for item in row] for row in tfidf_max10]).T
df_tfidf_c.index = ['top_' + str(i+1) for i in range(100)]
df_tfidf_c.columns = ['cluster_' + str(i+1) for i in range(cluster_count)]
df_tfidf_c = df_tfidf_c.T
df_tfidf_c[['weight_' + str(i+1) for i in range(100)]] = tfidf_weight
df_tfidf_c = df_tfidf_c[np.array([[df_tfidf_c.columns[i], df_tfidf_c.columns[i+100]] for i in range(100)]).reshape(100*2)]
df_tfidf_c

Unnamed: 0,top_1,weight_1,top_2,weight_2,top_3,weight_3,top_4,weight_4,top_5,weight_5,...,top_96,weight_96,top_97,weight_97,top_98,weight_98,top_99,weight_99,top_100,weight_100
cluster_1,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.376318,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π,0.265636,–≤–∫–ª–∞–¥,0.194467,—Å—Ç—É–¥–µ–Ω—Ç,0.162903,—Ä–∞–∑–≤–∏—Ç–∏–µ,0.143116,...,—Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ,0.04242,—Ç—É–≥–µ–ª—å,0.041776,–∑–∞–≤–µ—Ä—à–∏–ª—ã–π,0.041776,–Ω–æ–≥–∞—Ç–∞–µ–≤–Ω—ã–π,0.041776,—á–∏—Å—Ç–æ—Ç–∞,0.041776
cluster_2,—Å–∫–∏–¥–∫–∏–π,0.345105,—Å–±–µ—Ä–±–∞–Ω–∫–∏–π,0.207063,–ª–∏—Ü–æ–π,0.190872,—à–∫–æ–ª–∞,0.184296,okko,0.172553,...,–Ω—É—Ä–¥–∞—É–ª–µ—Ç,0.054535,–ø—Ä–∏—è—Ç–Ω—ã–π,0.054535,—Å–µ—Ä–∏–∞–ª,0.054535,—à–∞—Ö–º–∞—Ç–Ω—ã–π,0.054535,–ø—Ä–æ–µ–∫—Ç,0.051125
cluster_3,google,0.277098,meet,0.269806,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.265477,—ç–∫–∑–∞–º–µ–Ω,0.218762,—Å—Ç—É–¥–µ–Ω—Ç,0.214462,...,–æ–±—Ä–∞—Ç–Ω—ã–π,0.036916,–ø–µ—Ä–µ–π—Ç–∏,0.03646,–ø–æ—ç—Ç–æ–º—É,0.03646,—Å–≤–æ–π,0.036328,—Ü–µ–Ω—Ç—Ä,0.035985
cluster_4,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.241335,—Å–µ—Ä—Ç–∏—Ñ–∏–∫–∞—Ç,0.203734,—Ç–æ–Ω,0.179928,–¥–æ–∫—É–º–µ–Ω—Ç,0.162825,–±—ã—Ç—å,0.160513,...,–ø–æ—á—Ç–∞,0.04342,—Ç–æ–≥–¥–∞,0.042807,–æ—Ç–ø—Ä–∞–≤—å—Ç—ã–π,0.042807,–∞–≤–≥—É—Å—Ç,0.042471,–≥—Ä–∞–∂–¥–∞–Ω–∏–Ω,0.041135
cluster_5,–≤–æ–µ–Ω–Ω—ã–π,0.525057,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.205788,–ø—Ä–æ–µ–∫—Ç,0.195773,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π,0.16463,–∫–∞—Ñ–µ–¥—Ä–∞,0.163576,...,—Ç–∞–∫–∂–µ,0.031021,—Ä–∞–º–∫–∞,0.031021,—Ç–µ—Ö–Ω–æ–ª–æ–≥–∏—è,0.030971,–º–æ–º–µ–Ω—Ç,0.030971,—Å–æ—Ç—Ä—É–¥–Ω–∏–∫,0.030971
cluster_6,–º–µ–º–æ—Ä–∞–Ω–¥—É–º,0.49142,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.280092,–∞–æ,0.262661,–ø–æ–¥–ø–∏—Å–∞—Ç—å,0.208869,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π,0.192563,...,–∫–∞–¥—Ä,0.03426,–ø–æ–ª–∏—Ü–∏—è,0.03426,–ø–æ–¥—Ä–∞–∑–¥–µ–ª–µ–Ω–∏–µ,0.03426,–∫–∞–∑–æ–π—Å—Ç–∞–Ω–∞,0.03426,–≥–æ—Ç–æ–≤–∏—Ç—å,0.03426
cluster_7,–º–æ—Ä—Å–∫–æ–π,0.306848,—Ö–æ—Ç–µ—Ç—å,0.253456,–∞–∫–∞–¥–µ–º–∏—è,0.229487,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.191609,–≥–æ–¥,0.152229,...,–º–æ–º–µ–Ω—Ç,0.040553,–∫–æ—Ä—Ä—É–ø—Ü–∏—è,0.040553,—Ç–µ—Ö–Ω–∏—á–µ—Å–∫–∏–π,0.040553,–¥–∏—Ä–µ–∫—Ç–æ—Ä,0.040553,–∫–æ–º–∏—Å—Å–∏—è,0.040553
cluster_8,men,0.230199,sultany,0.230199,yessenov,0.227751,ar√Ωy,0.194784,–Ω–∞—à,0.168012,...,–∞–¥–∏–ª—å,0.042795,–∑–≤–∞—Ç—å,0.042795,—Ä–∞–º–∞–∑–∞–Ω,0.042795,–Ω–∞–∑–∞–¥,0.042795,—Ö–æ–±–±–∏,0.042795
cluster_9,–ø—Ä–µ–∑–∏–¥–µ–Ω—Ç,0.362085,–≤—ã–±–æ—Ä—ã,0.322865,–ø–ª–æ—â–∞–¥–∫–∞,0.271046,—Å—Ç—É–¥–µ–Ω—Ç,0.211975,–∫–∞–Ω–¥–∏–¥–∞—Ç,0.17563,...,–µ—Å–µ–Ω–≥–∞–ª–∏–µ–≤,0.040358,–Ω–æ–≤—ã–π,0.039339,–Ω–æ—è–±—Ä—å,0.039052,—á–ª–µ–Ω,0.039052,–Ω–∞–¥–µ—è—Ç—å—Å—è,0.037574
cluster_10,—É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç,0.311265,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π,0.2193,—Å–µ–º–∏–Ω–∞—Ä,0.218279,–∞–±–∞–π,0.135049,–±—ã—Ç—å,0.12643,...,–ø–µ—Ä–µ–≤–æ–¥—á–∏–∫,0.046726,–ø–æ–ø—É–ª—è—Ä–∏–∑–∏—Ä–æ–≤–∞—Ç—å,0.046726,—Ä–æ—Å—Å–∏–π,0.046726,–æ–±—Ä–∞–∑–æ–≤–∞–Ω–Ω—ã–π,0.046726,–≥–æ–Ω—á–∞—Ä–Ω—ã–π,0.046726


In [33]:
df_tfidf_c.to_excel('excel/insta_cluster_top100weights.xlsx')

In [172]:
last_otchet = results.drop_duplicates('media_id')[['media_id', 'media_type', 'comment_count', 'media_like_count',
                                     'media_created_at', 'post_text', 'negative_prob', 'positive_prob']]
last_otchet = last_otchet.set_index('media_id')
last_otchet

Unnamed: 0_level_0,media_type,comment_count,media_like_count,media_created_at,post_text,negative_prob,positive_prob
media_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,0.000834,81.496619
2550773232809730389_6323132732,1,1,205,1618295887,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,0.000997,0.000997
2550750258283139041_6323132732,8,1,205,1618293148,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,0.444102,49.549301
2550428231701540075_6323132732,8,4,332,1618254760,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,0.001000,0.001000
2550227268201948037_6323132732,1,1,158,1618230803,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤–∞—Å —Å –¥–Ω–µ–º ...,0.000674,52.372318
...,...,...,...,...,...,...,...
2086384315387530462_6323132732,2,17,345,1562943844,–ø—Ä–µ–¥–ª–∞–≥–∞–µ–º –≤–∞—à–µ–º—É –≤–Ω–∏–º–∞–Ω–∏—é –æ—á–µ—Ä–µ–¥–Ω–æ–π –≤—ã–ø—É—Å–∫ –ø—Ä...,14.481772,0.738945
2085649982255010735_6323132732,1,2,343,1562848875,–¥–æ–±—Ä–æ –ø–æ–∂–∞–ª–æ–≤–∞—Ç—å –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç! 12 ...,0.968042,1.445470
2084913117751324217_6323132732,2,2,553,1562841855,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...,0.070265,0.741019
2084744888537924606_6323132732,1,17,321,1562740979,—É–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! —Å–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –≤–∞—Å! –ø—Ä–∏–µ...,3.323011,0.039193


In [174]:
last_otchet['cluster_label'] = train_df.cluster_id
last_otchet['top10keywords'] = df_tfidf.T.apply(lambda x: ', '.join(x))
last_otchet['cluster_top100keywords'] = last_otchet.cluster_label.apply(lambda x: df_tfidf_c.T.apply(lambda x: ', '.join(x))[x])
last_otchet

Unnamed: 0_level_0,media_type,comment_count,media_like_count,media_created_at,post_text,negative_prob,positive_prob,cluster_label,top10keywords,cluster_top100keywords
media_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2551856921623855106_6323132732,2,2,234,1618478607,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,0.000834,81.496619,4,"–∫–ª–∞—Å—Å, –¥–µ–ª–æ, –º–∞—Å—Ç–µ—Ä, —Å–µ–º—å—è, –∞–∫—Ü–∏—è, –ø–æ–º–æ—á—å, –∫—Ä–æ...","–ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç–µ–ª—å, –æ–±—Å—É–¥–∏—Ç—å, –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä—Å–∫–∏–π, –æ–∫—Ç—è–±—Ä..."
2550773232809730389_6323132732,1,1,205,1618295887,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,0.000997,0.000997,14,"—Ä–∞—Å—Ç–∏, –µ–¥–∏–Ω—ã–π, –Ω–∞–º–µ—Ä–µ–Ω–∏–µ, –µ–¥–∏–Ω—Å—Ç–≤–æ, –±–ª–∞–≥–æ–ø–æ–ª—É—á...","—Ä–∞—Ö–º–∞—à—ã, —ç–ª—å–∂–∞–Ω, –Ω–∞–¥–µ–∂–Ω—ã–π, —Å–∞–≥–∏–¥—É–ª–ª–∏–Ω, —á–∏–Ω–≥–∏–∑,..."
2550750258283139041_6323132732,8,1,205,1618293148,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,0.444102,49.549301,7,"–∫–æ—Ä—Ä—É–ø—Ü–∏—è, –æ—Ç–≤–µ—Ç, —Å–µ–º—å—è, –≤—Å—Ç—Ä–µ—á–∞, —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ...","–ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏—Ç—å, –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç, –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä–æ–≤–∞—Ç—å, —Å–µ–º..."
2550428231701540075_6323132732,8,4,332,1618254760,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,0.001000,0.001000,13,"–∫–º, –º—É—Å–æ—Ä–∏—Ç—å, –≤—ã–±—Ä–∞—Å—ã–≤–∞—Ç—å, –∫–∏–ª–æ–º–µ—Ç—Ä, –ø—Ä–∏–±—Ä–µ–∂–Ω—ã...","–º–æ–π, —é—Ä–∏–¥–∏—á–µ—Å–∫–∏–π, –≥–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω—ã–π, –æ—Ç–∫—Ä—ã—Ç–∏–µ, —Ç..."
2550227268201948037_6323132732,1,1,158,1618230803,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤–∞—Å —Å –¥–Ω–µ–º ...,0.000674,52.372318,4,"–∂–µ–ª–∞—Ç—å, —É–≤–µ–ª–∏—á–∏–≤–∞—Ç—å—Å—è, –ø—Ä–∏–≤–ª–µ–∫–∞—Ç—å, —á–µ–ª–æ–≤–µ–∫, –ø–æ...","–ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç–µ–ª—å, –æ–±—Å—É–¥–∏—Ç—å, –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä—Å–∫–∏–π, –æ–∫—Ç—è–±—Ä..."
...,...,...,...,...,...,...,...,...,...,...
2086384315387530462_6323132732,2,17,345,1562943844,–ø—Ä–µ–¥–ª–∞–≥–∞–µ–º –≤–∞—à–µ–º—É –≤–Ω–∏–º–∞–Ω–∏—é –æ—á–µ—Ä–µ–¥–Ω–æ–π –≤—ã–ø—É—Å–∫ –ø—Ä...,14.481772,0.738945,12,"–¥–æ—Å—Ç–∏–∂–µ–Ω–∏–µ, –ø–æ–ª–∏—Ç–∏–∫–∞, —É—Å–ª–æ–≤–∏–µ, –∏–Ω—Ç–µ—Ä–≤—å—é, –Ω–æ–º–µ—Ä...","–ª—É—á—à–∏–π, —Ä–∞–¥, —Å—Ç–∞—Ä—Ç—ã–π, –≥—Ä–∞–Ω—Ç, –Ω–∞–∂–º–∏—Ç, –æ–Ω–∏, –æ–±—Ä–∞..."
2085649982255010735_6323132732,1,2,343,1562848875,–¥–æ–±—Ä–æ –ø–æ–∂–∞–ª–æ–≤–∞—Ç—å –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç! 12 ...,0.968042,1.445470,3,"–∫—É–ª—å—Ç—É—Ä–∞, —Å—Ç—É–¥–µ–Ω—á–µ—Å–∫–∏–π, –∂–¥–∞—Ç—å, –∏—Ö, —Å–µ–º—å—è, –≤–∏–¥,...","–ª–µ–∫—Ü–∏—è, –≤—ã–±—Ä–∞—Ç—å, –∏—Å–∫—É—Å—Å—Ç–≤–µ–Ω–Ω—ã–π, –∏–Ω—Ç–µ—Ä–∞–∫—Ç–∏–≤, –∏–Ω..."
2084913117751324217_6323132732,2,2,553,1562841855,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...,0.070265,0.741019,14,"—ç–ª–∞–º–ª–∞—Ç–µ—Ü–∏–Ω, —à–∫–æ–ª–∞, –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω—ã–π, –∑–Ω–∞–Ω–∏–µ, –∫—É–ª—å...","—Ä–∞—Ö–º–∞—à—ã, —ç–ª—å–∂–∞–Ω, –Ω–∞–¥–µ–∂–Ω—ã–π, —Å–∞–≥–∏–¥—É–ª–ª–∏–Ω, —á–∏–Ω–≥–∏–∑,..."
2084744888537924606_6323132732,1,17,321,1562740979,—É–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! —Å–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –≤–∞—Å! –ø—Ä–∏–µ...,3.323011,0.039193,9,"–ø—Ä—è–º–æ–π, —ç—Ñ–∏—Ä, —Å–æ–∏—Å–∫–∞—Ç–µ–ª—å, —Ç—Ä–∞–Ω—Å–ª–∏—Ä–æ–≤–∞—Ç—å, –≤–∞—à, ...","231, –≤–Ω—É—Ç—Ä–∏, —Ç—Ç–µ–ª–µ—Ñ–æ–Ω, –¥–æ—Å–∞–Ω, –æ—Å—Ç–∞–ª—å–Ω–æ–π, –ø–µ—Ä–µ–Ω..."


In [27]:
from datetime import datetime

last_otchet = pd.read_excel('excel/instagram_media.xlsx')
last_otchet.media_created_at = last_otchet.media_created_at.apply(lambda x: datetime.utcfromtimestamp(x).strftime('%d.%m.%Y %H:%M:%S'))
last_otchet

Unnamed: 0,media_id,media_type,comment_count,media_like_count,media_created_at,post_text,negative_prob,positive_prob,cluster_label,top10keywords,cluster_top100keywords
0,2551856921623855106_6323132732,2,2,234,15.04.2021 09:23:27,17 –º–∞—Ä—Ç–∞ —Å—Ä–µ–¥–∏ —Å—Ç—É–¥–µ–Ω—Ç–æ–≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–≥–æ —É–Ω–∏–≤–µ—Ä—Å–∏—Ç...,0.000834,81.496619,4,"–∫–ª–∞—Å—Å, –¥–µ–ª–æ, –º–∞—Å—Ç–µ—Ä, —Å–µ–º—å—è, –∞–∫—Ü–∏—è, –ø–æ–º–æ—á—å, –∫—Ä–æ...","–ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç–µ–ª—å, –æ–±—Å—É–¥–∏—Ç—å, –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä—Å–∫–∏–π, –æ–∫—Ç—è–±—Ä..."
1,2550773232809730389_6323132732,1,1,205,13.04.2021 06:38:07,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤—Å–µ—Ö –≤–∞—Å —Å ...,0.000997,0.000997,14,"—Ä–∞—Å—Ç–∏, –µ–¥–∏–Ω—ã–π, –Ω–∞–º–µ—Ä–µ–Ω–∏–µ, –µ–¥–∏–Ω—Å—Ç–≤–æ, –±–ª–∞–≥–æ–ø–æ–ª—É—á...","—Ä–∞—Ö–º–∞—à—ã, —ç–ª—å–∂–∞–Ω, –Ω–∞–¥–µ–∂–Ω—ã–π, —Å–∞–≥–∏–¥—É–ª–ª–∏–Ω, —á–∏–Ω–≥–∏–∑,..."
2,2550750258283139041_6323132732,8,1,205,13.04.2021 05:52:28,üìå 12 –∞–ø—Ä–µ–ª—è –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–æ–º —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç–µ –ø—Ä–æ—à–ª–∞ –≤...,0.444102,49.549301,7,"–∫–æ—Ä—Ä—É–ø—Ü–∏—è, –æ—Ç–≤–µ—Ç, —Å–µ–º—å—è, –≤—Å—Ç—Ä–µ—á–∞, —Ñ–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ...","–ø—Ä–µ–¥–æ—Å—Ç–∞–≤–∏—Ç—å, –∏–Ω—Å—Ç—Ä—É–º–µ–Ω—Ç, –¥–µ–º–æ–Ω—Å—Ç—Ä–∏—Ä–æ–≤–∞—Ç—å, —Å–µ–º..."
3,2550428231701540075_6323132732,8,4,332,12.04.2021 19:12:40,¬´2 –∏ 9 –∞–ø—Ä–µ–ª—è —Å—Ç—É–¥–µ–Ω—Ç—ã –≤–æ–ª–æ–Ω—Ç–µ—Ä—Å–∫–æ–≥–æ –¥–≤–∏–∂–µ–Ω–∏—è ...,0.001000,0.001000,13,"–∫–º, –º—É—Å–æ—Ä–∏—Ç—å, –≤—ã–±—Ä–∞—Å—ã–≤–∞—Ç—å, –∫–∏–ª–æ–º–µ—Ç—Ä, –ø—Ä–∏–±—Ä–µ–∂–Ω—ã...","–º–æ–π, —é—Ä–∏–¥–∏—á–µ—Å–∫–∏–π, –≥–æ—Å—É–¥–∞—Ä—Å—Ç–≤–µ–Ω–Ω—ã–π, –æ—Ç–∫—Ä—ã—Ç–∏–µ, —Ç..."
4,2550227268201948037_6323132732,1,1,158,12.04.2021 12:33:23,–µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç –ø–æ–∑–¥—Ä–∞–≤–ª—è–µ—Ç –≤–∞—Å —Å –¥–Ω–µ–º ...,0.000674,52.372318,4,"–∂–µ–ª–∞—Ç—å, —É–≤–µ–ª–∏—á–∏–≤–∞—Ç—å—Å—è, –ø—Ä–∏–≤–ª–µ–∫–∞—Ç—å, —á–µ–ª–æ–≤–µ–∫, –ø–æ...","–ø—Ä–µ–¥—Å—Ç–∞–≤–∏—Ç–µ–ª—å, –æ–±—Å—É–¥–∏—Ç—å, –ø—Ä–æ—Ñ–µ—Å—Å–æ—Ä—Å–∫–∏–π, –æ–∫—Ç—è–±—Ä..."
...,...,...,...,...,...,...,...,...,...,...,...
406,2086384315387530462_6323132732,2,17,345,12.07.2019 15:04:04,–ø—Ä–µ–¥–ª–∞–≥–∞–µ–º –≤–∞—à–µ–º—É –≤–Ω–∏–º–∞–Ω–∏—é –æ—á–µ—Ä–µ–¥–Ω–æ–π –≤—ã–ø—É—Å–∫ –ø—Ä...,14.481772,0.738945,12,"–¥–æ—Å—Ç–∏–∂–µ–Ω–∏–µ, –ø–æ–ª–∏—Ç–∏–∫–∞, —É—Å–ª–æ–≤–∏–µ, –∏–Ω—Ç–µ—Ä–≤—å—é, –Ω–æ–º–µ—Ä...","–ª—É—á—à–∏–π, —Ä–∞–¥, —Å—Ç–∞—Ä—Ç—ã–π, –≥—Ä–∞–Ω—Ç, –Ω–∞–∂–º–∏—Ç, –æ–Ω–∏, –æ–±—Ä–∞..."
407,2085649982255010735_6323132732,1,2,343,11.07.2019 12:41:15,–¥–æ–±—Ä–æ –ø–æ–∂–∞–ª–æ–≤–∞—Ç—å –≤ –µ—Å–µ–Ω–æ–≤—Å–∫–∏–π —É–Ω–∏–≤–µ—Ä—Å–∏—Ç–µ—Ç! 12 ...,0.968042,1.445470,3,"–∫—É–ª—å—Ç—É—Ä–∞, —Å—Ç—É–¥–µ–Ω—á–µ—Å–∫–∏–π, –∂–¥–∞—Ç—å, –∏—Ö, —Å–µ–º—å—è, –≤–∏–¥,...","–ª–µ–∫—Ü–∏—è, –≤—ã–±—Ä–∞—Ç—å, –∏—Å–∫—É—Å—Å—Ç–≤–µ–Ω–Ω—ã–π, –∏–Ω—Ç–µ—Ä–∞–∫—Ç–∏–≤, –∏–Ω..."
408,2084913117751324217_6323132732,2,2,553,11.07.2019 10:44:15,# –æ–±—Ä–∞–∑–æ–≤–∞–Ω–∏–µ # –¥–µ—Ç–∏ # —à–∫–æ–ª–∞ # –Ω–∞—É–∫–∞ # –æ–±—Ä–∞–∑–æ–≤...,0.070265,0.741019,14,"—ç–ª–∞–º–ª–∞—Ç–µ—Ü–∏–Ω, —à–∫–æ–ª–∞, –Ω–∞—Ü–∏–æ–Ω–∞–ª—å–Ω—ã–π, –∑–Ω–∞–Ω–∏–µ, –∫—É–ª—å...","—Ä–∞—Ö–º–∞—à—ã, —ç–ª—å–∂–∞–Ω, –Ω–∞–¥–µ–∂–Ω—ã–π, —Å–∞–≥–∏–¥—É–ª–ª–∏–Ω, —á–∏–Ω–≥–∏–∑,..."
409,2084744888537924606_6323132732,1,17,321,10.07.2019 06:42:59,—É–≤–∞–∂–∞–µ–º—ã–µ —Å–æ–∏—Å–∫–∞—Ç–µ–ª–∏! —Å–ø–µ—Ü–∏–∞–ª—å–Ω–æ –¥–ª—è –≤–∞—Å! –ø—Ä–∏–µ...,3.323011,0.039193,9,"–ø—Ä—è–º–æ–π, —ç—Ñ–∏—Ä, —Å–æ–∏—Å–∫–∞—Ç–µ–ª—å, —Ç—Ä–∞–Ω—Å–ª–∏—Ä–æ–≤–∞—Ç—å, –≤–∞—à, ...","231, –≤–Ω—É—Ç—Ä–∏, —Ç—Ç–µ–ª–µ—Ñ–æ–Ω, –¥–æ—Å–∞–Ω, –æ—Å—Ç–∞–ª—å–Ω–æ–π, –ø–µ—Ä–µ–Ω..."


In [29]:
last_otchet.to_excel('excel/instagram_media.xlsx')