# Bot 

In [None]:

import logging
from telegram import Update, Bot
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters, CallbackContext

bot = Bot(token=API_TOKEN)
updater = Updater(token=API_TOKEN, use_context=True)
dispatcher = updater.dispatcher

logging.basicConfig(format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                     level=logging.INFO)


def send_new_channel_messages(update: Update, context: CallbackContext):
    channel_id = 'my life'
    channel_updates = bot.get_chat_updates(channel_id)

    user_id = update.message.from_user.id

    for update in channel_updates:
        if update.message:
            bot.forward_message(chat_id=user_id, from_chat_id=channel_id, message_id=update.message.message_id)


def start(update: Update, context: CallbackContext):
    user_id = update.message.from_user.id
    update.message.reply_text(f"Hello! I will send you new messages from the channel.")


def main():
    dispatcher.add_handler(MessageHandler(Filters.chat_type.channel, send_new_channel_messages))
    dispatcher.add_handler(CommandHandler("start", start))

    updater.start_polling()
    updater.idle()


if __name__ == '__main__':
    main()

# Config upload

In [2]:
import configparser
import json

from telethon import TelegramClient
from telethon.errors import SessionPasswordNeededError

In [3]:
# Reading Configs
config = configparser.ConfigParser()
config.read("config.ini")

# Setting configuration values
api_id = config['Telegram']['api_id']
api_hash = config['Telegram']['api_hash']
api_token = config['Telegram']['api_token']

api_hash = str(api_hash)

phone = config['Telegram']['phone']
username = config['Telegram']['username']
channel_link = config['Telegram']['channel_link'] 

db_name = config['database']['db_name']
db_user = config['database']['db_user']
db_password = config['database']['db_password'] 
db_host = config['database']['db_host'] 


In [4]:
from transformers import pipeline


facebook_bart_large_mnli = pipeline("zero-shot-classification", model="/media/tonyalpha/HDD/facebook-bart-large-mnli")
bertweet_base_sentiment_analysis = pipeline("text-classification", model="/media/tonyalpha/HDD/bertweet-base-sentiment-analysis")

In [32]:
# if not client.is_user_authorized():
#     client.send_code_request(phone)
#     try:
#         client.sign_in(phone, input('Enter the code: '))
#     except SessionPasswordNeededError:
#         client.sign_in(password=input('Password: '))

# me = client.get_me()
# print(me)

# Get data from telegram 

In [116]:
from telethon.tl.types import InputMessagesFilterPhotos
from telethon import TelegramClient, events
import nest_asyncio

nest_asyncio.apply()

import os.path

def get_posts(num_posts = 1, download_media = False, only_with_media = True, offset_id = 0, posts_list = []):
    client = TelegramClient(username, api_id, api_hash)
    posts_list = []
    async def main(num_posts, download_media, offset_id, posts_list):
        
        if num_posts < 100:
            message_limit = num_posts
        else:
            message_limit = 100
            
            
        last_post_date = None
        post = {}
        
        await client.start()

        try:
            entity = await client.get_entity(channel_link)

            while True:
                messages = await client.get_messages(entity, limit=message_limit, offset_id = offset_id)

                for message in messages:
                    
                    if only_with_media:
                        if not message.media:
                            continue
                    
                    if last_post_date == None:
                        last_post_date = message.date


                    if last_post_date != message.date: 
                        post['upload_date'] = last_post_date.strftime('%Y-%m-%d %H:%M:%S') if last_post_date else None

                        last_post_date = message.date

                        posts_list.append(post)
                        post = {}


                    if len(message.message) > 0:
                        post['text'] = post.get('text', '') + message.message 
                        
                        # post_id in messages set will be id of message with text and photo 
                        post['post_id'] = message.id


                    if message.media and download_media:

                        photo_id = message.media.photo.id
                        filename = f'image_{photo_id}.jpg'
                        path = f"./media/{filename}"

                        # some of this i should drop, but later 
                        post['photos_id_list'] = post.get('photos_id_list', []) + [photo_id]
                        post['photos_names_list'] = post.get('photos_names_list', []) + [filename]

                        #if file exist check 
                        if not os.path.isfile(path) and download_media:
                            await client.download_media(message, file=path)

                    post['id_list'] = post.get('id_list', []) + [message.id]
                    post['edit_date'] = message.edit_date.strftime('%Y-%m-%d %H:%M:%S') if message.edit_date else None


                offset_id = messages[len(messages) - 1].id

                if len(posts_list) >= num_posts:
                    break


        except Exception as e:
            print(f"Error: {e}")

        await client.disconnect()
        
        return posts_list

    if __name__ == "__main__":
        client.loop.run_until_complete(main(num_posts, download_media, offset_id, posts_list))
        
    return posts_list

In [123]:
get_posts(num_posts = 1)

[{'text': 'asdfsdf',
  'post_id': 700,
  'id_list': [700],
  'edit_date': None,
  'upload_date': '2023-07-24 14:39:42'}]

In [14]:
a[-1]['text']

'05.04.23\n\n👨\u200d👨\u200d👦\u200d👦 🤦🏼 Did not find team for HSE hackaton\n💁🏼\u200d♂️💬 ODS random coffee with Roman\n📚 DL: semianr\n\nDay rate: 5.5/10'

In [308]:
 a[0]['text'].split('\n')

['чпок',
 '',
 'How productive have you been?:6.999/10',
 'How interesting was the day?: 6/10',
 'How stressful was the day?: 2/10']

# Data parsing

In [138]:
import re 

def parse_text(dct):
    
    input_string = dct['text']
    print(input_string)
    
    date_pattern = r'\d{2}\.\d{2}\.\d{2}'
    day_pattern = r'\b(?:MONDAY|TUESDAY|WEDNESDAY|THURSDAY|FRIDAY|SATURDAY|SUNDAY)\b'
    
    date_match = re.search(date_pattern, input_string)
    day_match = re.search(day_pattern, input_string)
    
    date = date_match.group() if date_match else None
    day = day_match.group() if day_match else None
    
    parsed_date = date.replace('.', '-') if date or day else None
    
    dct['parsed_date'] = parsed_date
    
    if date_match:
        input_string = input_string.replace(date_match.group(), "") if parsed_date is not None else input_string
    if day_match:
        input_string = input_string.replace(day_match.group(), "") if parsed_date is not None else input_string
    
    regex_list = [r'How productive have you been\?:\s*(\d+\.?\d*)/10', r'How interesting was the day\?:\s*(\d+\.?\d*)/10', r'How stressful was the day\?:\s*(\d+\.?\d*)/10']
    names_list = ['productivity_score', 'interest_score', 'stress_score']
    
    
    for regex, name in zip(regex_list, names_list):
        match = re.search(regex, input_string)
        parsed_score = float(match.group(1)) if match else None
        
        dct[name] = parsed_score

        input_string = input_string.replace(match.group(), "") if parsed_score is not None else input_string
        
    
    dct['parsed_text'] = input_string.split('\n')
    
    print('Text parsed!')
    return dct
    
def parse_activities(dct):
    
    input_list = dct['parsed_text']
    
    result = []
    for item in input_list:
        match = re.match(r'^([\U0001F000-\U0001F9FF]+|[\U0001FA00-\U0001FA6F]+)?\s*(.*)$', item)
        if match:
            emoji = match.group(1)
            text = match.group(2).strip()
            if emoji or text:
                result.append((emoji, text))
                
    dct['parsed_activities'] = result
    
    print('Activities parsed!')
    return dct

In [97]:
parse_activities(parse_text(a[30]))

24.06.23


{'id_list': [564, 563, 562, 561],
 'edit_date': None,
 'text': '24.06.23',
 'post_id': 561,
 'upload_date': '2023-06-28 15:43:13',
 'parsed_date': '24-06-23',
 'productivity_score': None,
 'interest_score': None,
 'stress_score': None,
 'parsed_text': [''],
 'parsed_activities': []}

# Upload to DB 

In [84]:
import mysql.connector

from getpass import getpass
from mysql.connector import connect, Error


def db_connect():
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            create_posts_table_query = """
            CREATE TABLE IF NOT EXISTS posts(
                post_id INT,
                text TEXT,
                parsed_date DATE,
                upload_date DATETIME,
                edit_date DATETIME, 
                productivity_score FLOAT,
                interest_score FLOAT,
                stress_score FLOAT,
                PRIMARY KEY (post_id)
            );
            """
            
            create_posts_table_activities = """
            CREATE TABLE IF NOT EXISTS activities(
                activity_id INT AUTO_INCREMENT,
                post_id INT,
                activity_name TEXT,
                activity_emoji TEXT,
                activity_type TEXT,
                activity_emotion TEXT,
                
                FOREIGN KEY (post_id) REFERENCES posts(post_id),
                PRIMARY KEY (activity_id)
            );
            """
            
            
            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(create_posts_table_query)
                cursor.execute(create_posts_table_activities)
                connection.commit()
    except Error as e:
        print(e)
        
def upload_post(dct):
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            check_if_exist = """SELECT EXISTS(SELECT 1 FROM posts WHERE post_id = %s LIMIT 1)"""
            
            upload_posts_table_query = f"""
            INSERT INTO posts (post_id, text, parsed_date, upload_date, 
            edit_date,productivity_score, interest_score, stress_score) 
            VALUES(%s, %s, %s, %s, %s, %s, %s, %s)
            """
            
            post_id = (dct.get('post_id', None),)
            
            insert_data = (dct.get('post_id', None), dct.get('text', None), 
                    dct.get('parsed_date', None), dct.get('upload_date', None), 
                    dct.get('edit_date', None),dct.get('productivity_score', None),
                    dct.get('interest_score', None),dct.get('stress_score', None))
            
            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(check_if_exist, post_id)
                for el in cursor:
                    exist = el[0]
                
                if not exist:
                    cursor.execute(upload_posts_table_query, insert_data)
                    connection.commit()
                else:
                    print('ERROR: post already uploaded, if you need update it use update_post()')
    except Error as e:
        print(e)
        
        
def upload_activities(dct):
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            
            for activity_emoji, activity_name in dct['parsed_activities']:
                
                
                activity_type = predict_activity_type()
                
                activity_emotion = 
                
                

                check_if_exist = """SELECT EXISTS(SELECT 1 FROM activities WHERE post_id = %s AND activity_name = %s LIMIT 1)"""

                upload_posts_table_query = f"""
                INSERT INTO activities (post_id, activity_name, activity_emoji, activity_type, 
                activity_emotion) 
                VALUES(%s, %s, %s, %s, %s)
                """

                post_id = (dct.get('post_id', None), activity_name)

                insert_data = (dct.get('post_id', None), activity_name, 
                              activity_emoji, activity_type, activity_emotion)

                activity_id INT AUTO_INCREMENT,
                    post_id INT,
                    activity_name TEXT,
                    activity_emoji TEXT,
                    activity_type TEXT,
                    activity_emotion TEXT,
                
            
            
            
            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(check_if_exist, post_id)
                for el in cursor:
                    exist = el[0]
                
                if not exist:
                    cursor.execute(upload_posts_table_query, insert_data)
                    connection.commit()
                else:
                    print('ERROR: post already uploaded, if you need update it use update_post()')
    except Error as e:
        print(e)
        
        
def update_post(dct):
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            update_posts_table_query = """
            UPDATE posts SET text = %s, parsed_date = %s,  
            upload_date = %s, edit_date = %s, productivity_score = %s,
            interest_score = %s, stress_score = %s WHERE post_id = %s
            """
            
            data = (dct.get('text', None), 
                    dct.get('parsed_date', None), dct.get('upload_date', None), 
                    dct.get('edit_date', None),dct.get('productivity_score', None),
                    dct.get('interest_score', None),dct.get('stress_score', None),
                   dct.get('post_id', None))

            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(update_posts_table_query, data)
                connection.commit()
    except Error as e:
        print(e)

def db_show_tables():
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            show_tables = "SHOW TABLES;"
            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(show_tables)
                for el in cursor.fetchall():
                    print(el)
    except Error as e:
        print(e)
        
def db_execute_query(query):
    try:
        with connect(host=db_host, user=db_user, password=db_password,) as connection:
            use_db_query = f"USE {db_name};"
            with connection.cursor() as cursor:
                cursor.execute(use_db_query)
                cursor.execute(query)
                for el in cursor:
                    print(el)
    except Error as e:
        print(e)

In [85]:
db_connect()

In [459]:
db_show_tables()

('posts',)


In [551]:
db_execute_query('SELECT EXISTS(SELECT * FROM posts WHERE post_id = 9 LIMIT 1)')

(0,)


In [80]:
db_execute_query('SELECT * FROM posts;')

# Main

In [152]:
def check_post(post):
    
    date = post.get('upload_date', None)
    
    if post.get('parsed_date', None) is None:
        print(f'I dont see date, can you add it to post uploaded at {date}?')
        return False

    if post.get('productivity_score', None) is None:
        print(f'I dont see productivity_score, can you add it to post uploaded at {date}?')
        return False
        
    if post.get('interest_score', None) is None:
        print(f'I dont see interest_score, can you add it to post uploaded at {date}?')
        return False
        
    if post.get('productivity_score', None) is None:
        print(f'I dont see productivity_score, can you add it to post uploaded at {date}?')
        return False
        
    if post.get('stress_score', None) is None:
        print(f'I dont see stress_score, can you add it to postuploaded at {date}?')
        return False
    print('Post correct!')
    return True

def check_activities(post):
    
    date = post.get('upload_date', None)
    
    if post.get('parsed_activities', None) is None or len(post.get('parsed_activities', None)) < 0:
        print(f'I dont see activities, can you add some to post uploaded at {date}?')
        return False
    return True


In [150]:


posts = get_posts(num_posts = 2)

for post in posts:
    post = parse_text(post)
    post = parse_activities(post)
    if check_post(post):
        upload_post(post)
    
    if check_activities(post):
        
        # get_intents
        # classify_activity
        upload_activities(post)


05.04.23
asdfsdf

How productive have you been?: 0/10
How interesting was the day?: 9/10
How stressful was the day?: 0/10
Text parsed!
Activities parsed!
Post correct!
ERROR: post already uploaded, if you need update it use update_post()
чпок

How productive have you been?:6.999/10
How interesting was the day?: 6/10
How stressful was the day?: 2/10
Text parsed!
Activities parsed!
I dont see date, can you add it to post uploaded at 2023-07-23 16:22:13?


# Activity classification

In [5]:
from transformers import pipeline

In [27]:
sequence_to_classify = "box traning"
candidate_labels = ['self_development', 'university', 'work_&_job', 'relax_&_rest', 'sport', 'family', 'travel_&_adventure', 'home_chore', 'other']
facebook_bart_large_mnli(sequence_to_classify, candidate_labels)

{'sequence': 'box traning',
 'labels': ['other',
  'self_development',
  'relax_&_rest',
  'travel_&_adventure',
  'work_&_job',
  'home_chore',
  'sport',
  'family',
  'university'],
 'scores': [0.3176390826702118,
  0.18102319538593292,
  0.10530360788106918,
  0.10282707959413528,
  0.10149189084768295,
  0.08444009721279144,
  0.059940967708826065,
  0.03241773694753647,
  0.014916374348104]}

In [24]:
def get_activity_type(activity_name, classifier):
    candidate_labels = ['self_development', 'university', 'work_&_job', 'relax_&_rest', 'sport', 'family', 'travel_&_adventure', 'home_chore', 'other']
    return classifier(activity_name, candidate_labels)['labels'][0]


In [26]:
get_activity_type('boxing traning',facebook_bart_large_mnli)

'sport'

# Sentiment analysis 

In [31]:
from transformers import pipeline

bertweet_base_sentiment_analysis = pipeline("text-classification", model="/media/tonyalpha/HDD/bertweet-base-sentiment-analysis")

In [30]:
bertweet_base_sentiment_analysis('Sad day', top_k = 3)

[{'label': 'NEG', 'score': 0.8925472497940063},
 {'label': 'NEU', 'score': 0.0853690579533577},
 {'label': 'POS', 'score': 0.022083677351474762}]

In [5]:
#!pip install datasets

In [6]:
from datasets import load_dataset

dataset = load_dataset("yelp_review_full")
dataset["train"][100]

Downloading builder script:   0%|          | 0.00/4.41k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/2.04k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/6.55k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/196M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/650000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/50000 [00:00<?, ? examples/s]

{'label': 0,
 'text': 'My expectations for McDonalds are t rarely high. But for one to still fail so spectacularly...that takes something special!\\nThe cashier took my friends\'s order, then promptly ignored me. I had to force myself in front of a cashier who opened his register to wait on the person BEHIND me. I waited over five minutes for a gigantic order that included precisely one kid\'s meal. After watching two people who ordered after me be handed their food, I asked where mine was. The manager started yelling at the cashiers for \\"serving off their orders\\" when they didn\'t have their food. But neither cashier was anywhere near those controls, and the manager was the one serving food to customers and clearing the boards.\\nThe manager was rude when giving me my order. She didn\'t make sure that I had everything ON MY RECEIPT, and never even had the decency to apologize that I felt I was getting poor service.\\nI\'ve eaten at various McDonalds restaurants for over 30 years. 

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")


def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)


tokenized_datasets = dataset.map(tokenize_function, batched=True)

Downloading (…)okenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

Map:   0%|          | 0/650000 [00:00<?, ? examples/s]

Map:   0%|          | 0/50000 [00:00<?, ? examples/s]

In [8]:
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

In [9]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased", num_labels=5)

Downloading pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

KeyboardInterrupt: 

In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(output_dir="test_trainer")

In [None]:
import numpy as np
import evaluate

metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(output_dir="test_trainer", evaluation_strategy="epoch")

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
trainer.train()

# Sync with DB

# Etc 