In [43]:
from bs4 import BeautifulSoup as bs
from bs4.element import Comment
import pandas as pd
import numpy as np
import requests
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import datetime as dt
import string
import time

In [10]:
def value_fix(value, replace = False):
    letters_pattern = str.maketrans('', '', string.ascii_letters)
    value = value.translate(letters_pattern).strip()
    if replace:
        return float(value.replace(',', '.'))
    else:
        return value.replace(',', '')


In [2]:
def months(month):
        Months = {
                'Ocak': '1',	
                'Şubat': '2',
                'Mart': '3',	
                'Nisan': '4',	
                'Mayıs': '5',	
                'Haziran': '6',	
                'Temmuz': '7',	
                'Ağustos': '8',	
                'Eylül': '9',	
                'Ekim': '10',	
                'Kasım': '11',	
                'Aralık': '12'
                }       
        if month in Months:
                return Months[month]
        else:
                return month
def fix_time_it(time):
        str_time = ''
        for i in time.split(' '):
                str_time += months(i) + '-'
        #return str_time[:-1]
        return dt.datetime.strptime(str(str_time[:-1]), '%d-%m-%Y').date()


In [None]:
def time_it(time):
    """
    Returns time data type of given string in form Y-m-d.
    Example: 2001-01-01."""
    return dt.datetime.strptime(str(time), '%Y-%m-%d')

In [1]:
def g_stocks(stock, time_delta = 999):
    scope =  ['https://www.googleapis.com/auth/spreadsheets', "https://www.googleapis.com/auth/drive.file", "https://www.googleapis.com/auth/drive"]
    creds = ServiceAccountCredentials.from_json_keyfile_name('cred.json', scope)
    client = gspread.authorize(creds)
    sheet = client.open('stocks').sheet1
    sheet.update_cell(1, 7, stock)
    sheet.update_cell(1, 8, time_delta)
    if not sheet.get_all_records() == []:
        df = pd.DataFrame(sheet.get_all_records())[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']]
        df.drop(df[df['Close'].str.len() == 0].index, inplace = True)
        type_map = {'Open': float,
                    'Close': float,
                    'High': float,
                    'Low': float,
                    'Volume':int}   
        return df.astype(type_map)
    else:
        return pd.DataFrame(columns=['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])

In [18]:
def page_urls(url):
    page_counter = 1
    pages = [url]
    soup = make_soup(url)
    while soup.find(class_ = 'rightNav'):
        page_counter += 1
        pages.append(url+f'page/{page_counter}/')
        soup = make_soup(pages[-1])
    return pages    


In [3]:

def make_request(url):
    """Request maker.
    Makes request for given url."""
    headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.135 Safari/537.36 Edge/12.246"}
    return requests.get(url=url, headers=headers)

In [4]:
def make_soup(url):
    """Soup maker.
    Returns soup for given url."""
    r = make_request(url)
    if r.status_code == 200:
        return bs(r.content, 'html5lib')
    else:
        return False

In [32]:
def get_halka_arz_info(url, soup = False):
    cols = []
    vals = []
    if not soup:
        soup = make_soup(url)
    info_table = soup.find('table', {'class': 'sp-table'})
    rows = info_table.find_all('tr')
    for row in rows:
        row_data = row.find_all('td')
        cols.append(row_data[0].text)
        vals.append(row_data[1].text)
    return pd.DataFrame(data = [vals], columns = cols)

In [36]:
def get_halka_arz_result(url, soup = False):
    if not soup:
        soup = make_soup(url)
    vals = []
    cols = []
    #cols = ['Yurtiçi_Bireysel_Kisi', 'Yurtiçi_Bireysel_Lot',
    #    'Yurtiçi_Kurumsal_Kisi', 'Yurtiçi_Kurumsal_Lot',
    #    'Yurtdışı_Kurumsal_Kisi', 'Yurtdışı_Kurumsal_Lot',
    #    'Şirket_Çalışanları_Kisi', 'Şirket_Çalışanları_Lot']
    table = soup.find('table', {'class': 'as-table'})
    if not table == None:
        rows = table.find_all('tr')
        for i in range(2, len(rows)-2):
            row_data = rows[i].find_all('td')
            for ex in ['_Kisi', '_Lot']:
                cols.append(row_data[0].text.strip()+ex)
            for j in range(1,3):
                vals.append(row_data[j].text)
        return pd.DataFrame(data = [vals], columns = cols)
    else:
        return pd.DataFrame()

In [20]:
url = 'https://halkarz.com/k/halka-arz/'

In [26]:
start_year = dt.datetime.today().year
year_period = 3
halka_arz_pages = {}
stocks = {}
for i in range(year_period):
    year_page = url+f'{start_year - i}/'
    #halka_arz_pages.update({page : {'Pages':page_urls(page)}})
    pages = page_urls(year_page)
    for page in pages:
        page_soup = make_soup(page)
        stocks_ = page_soup.find_all(class_ = "halka-arz-list archives")
        for stock in stocks_:
            stock_content = stock.find(class_ = 'il-content')
            stock_symbol = stock_content.find(class_ = 'il-bist-kod').get_text(strip=True)
            stock_name = stock_content.find(class_ = 'il-halka-arz-sirket').find('a')['title']
            stock_page = stock_content.find(class_ = 'il-halka-arz-sirket').find('a')['href']
            stocks.update({stock_symbol :
                            {'url':stock_page,
                             'stock_name':stock_name,
                             'release_year':start_year-i}})

In [41]:
for stock in stocks:
    stocks[stock].update({'Halka_arz_info': get_halka_arz_info(stocks[stock]['url'])})
    stocks[stock].update({'Halka_arz_result': get_halka_arz_result(stocks[stock]['url'])})

In [45]:
for stock in stocks:
    print(stock)
    stocks[stock].update({'Hist_data': g_stocks(stock, time_delta=1100)})
    time.sleep(1)

AVPGY
MEGMT
KBORU
SURGY
CATES
SKYMD
BEGYO
AGROT
EKOS
BINHO
MARBL
TABGD
VRGYO
MHRGY
BORLS
DOFER
MEKAG


APIError: {'code': 429, 'message': "Quota exceeded for quota metric 'Read requests' and limit 'Read requests per minute per user' of service 'sheets.googleapis.com' for consumer 'project_number:151682136276'.", 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'RATE_LIMIT_EXCEEDED', 'domain': 'googleapis.com', 'metadata': {'service': 'sheets.googleapis.com', 'quota_location': 'global', 'quota_limit_value': '60', 'consumer': 'projects/151682136276', 'quota_metric': 'sheets.googleapis.com/read_requests', 'quota_limit': 'ReadRequestsPerMinutePerUser'}}, {'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Request a higher quota limit.', 'url': 'https://cloud.google.com/docs/quota#requesting_higher_quota'}]}]}