In [1]:
import pandas as pd

stocks_data_path = "data/stocklist.csv"
stocks_list = pd.read_csv(stocks_data_path)
stocks_list.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Symbol        40 non-null     object
 1   Company Name  40 non-null     object
dtypes: object(2)
memory usage: 768.0+ bytes


In [2]:
stocks_list.head()

Unnamed: 0,Symbol,Company Name
0,ADANIPORTS.NS,Adani Ports & SEZ Ltd.
1,ASIANPAINT.NS,Asian Paints Ltd.
2,AXISBANK.NS,Axis Bank Ltd.
3,BAJAJ-AUTO.NS,Bajaj Auto Ltd.
4,BAJFINANCE.NS,Bajaj Finance Ltd.


In [7]:
import aiohttp
import nest_asyncio

In [17]:
import asyncio
import aiohttp
import pandas as pd
import yfinance as yf
import nest_asyncio
import io

nest_asyncio.apply()

async def download_stock_data(session, symbol, start_date, end_date):
    # Constructing the URL for downloading data
    url = f"https://query1.finance.yahoo.com/v7/finance/download/{symbol}?period1={start_date}&period2={end_date}&interval=1d&events=history"
    async with session.get(url) as response:
        # Read the response as a CSV
        print(f"Called for company {symbol}")
        data = await response.text()
        df = pd.read_csv(io.StringIO(data))
        return df

async def main(stocks_list, start_date, end_date):
    # Convert date to timestamp
    start_ts = int(pd.to_datetime(start_date).timestamp())
    end_ts = int(pd.to_datetime(end_date).timestamp())

    async with aiohttp.ClientSession() as session:
        tasks = []
        for _, row in stocks_list.iterrows():
            symbol = row['Symbol']
            task = asyncio.create_task(download_stock_data(session, symbol, start_ts, end_ts))
            tasks.append(task)

        nifty_data_list = await asyncio.gather(*tasks)

        # Adding company name and code to the dataframes
        for df, (_, row) in zip(nifty_data_list, stocks_list.iterrows()):
            df['company'] = row['Company Name']
            df['company_code'] = row['Symbol'].split('.')[0]

        return nifty_data_list

# Example usage
stocks_list = pd.DataFrame({
    'Symbol': ['ADANIPORTS.NS', 'BAJAJ-AUTO.NS'],
    'Company Name': ['Adani Ports', 'Bajaj Auto Ltd.']
})

start_date = '2023-01-01'
end_date = '2023-11-30'

# Run the asynchronous main function
# nifty_data_list = asyncio.run(main(stocks_list, start_date, end_date))

loop = asyncio.get_event_loop()

# Run the main function within the existing event loop
nifty_data_list = loop.run_until_complete(main(stocks_list, start_date, end_date))

final_nifty_data_df = pd.concat(nifty_data_list, axis=0)

Called for company ADANIPORTS.NS
Called for company BAJAJ-AUTO.NS


In [18]:
final_nifty_data_df.company_code.unique()

array(['ADANIPORTS', 'BAJAJ-AUTO'], dtype=object)

In [20]:
final_nifty_data_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 434 entries, 0 to 216
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Date          434 non-null    object 
 1   Open          434 non-null    float64
 2   High          434 non-null    float64
 3   Low           434 non-null    float64
 4   Close         434 non-null    float64
 5   Adj Close     434 non-null    float64
 6   Volume        434 non-null    int64  
 7   company       434 non-null    object 
 8   company_code  434 non-null    object 
dtypes: float64(5), int64(1), object(3)
memory usage: 33.9+ KB


In [22]:
hello = final_nifty_data_df.to_csv(index=False)

In [24]:
'helllo.NS'.split('.NS')[0]

'helllo'

In [10]:
import yfinance as yf

start_date = '2023-01-01'
end_date = '2023-11-30'

nifty_data_list = []

for _, r in stocks_list.iterrows():
    print(f"Company = {r['Company Name']}")
    df = yf.download(r.Symbol, start=start_date, end=end_date)
    df['company'] = r['Company Name']
    df['company_code'] = r['Symbol'].split('.')[0]
    nifty_data_list.append(df)

Company = Adani Ports & SEZ Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Asian Paints Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Axis Bank Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Bajaj Auto Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Bajaj Finance Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Bajaj Finserv Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Bharat Petroleum Corporation Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Bharti Airtel Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Infratel Ltd.
[*********************100%%**********************]  1 of 1 completed


1 Failed download:
['INFRATEL.NS']: Exception('%ticker%: No timezone found, symbol may be delisted')



Company = Cipla Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Coal India Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Dr. Reddy's Laboratories Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Eicher Motors Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = GAIL (India) Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Grasim Industries Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = HCL Technologies Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = HDFC Bank Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Hero MotoCorp Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = Hindalco Industries Ltd.
[*********************100%%**********************]  1 of 1 completed
Company = H

In [11]:
nifty_data_df = pd.concat(nifty_data_list, axis=0)
nifty_data_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8247 entries, 2023-01-02 to 2023-11-17
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Open          8247 non-null   float64
 1   High          8247 non-null   float64
 2   Low           8247 non-null   float64
 3   Close         8247 non-null   float64
 4   Adj Close     8247 non-null   float64
 5   Volume        8247 non-null   float64
 6   company       8247 non-null   object 
 7   company_code  8247 non-null   object 
dtypes: float64(6), object(2)
memory usage: 579.9+ KB


  nifty_data_df = pd.concat(nifty_data_list, axis=0)


In [12]:
nifty_data_df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,company,company_code
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-01-02,823.0,826.75,816.299988,822.299988,816.808838,2042294.0,Adani Ports & SEZ Ltd.,ADANIPORTS
2023-01-03,822.25,826.400024,817.799988,820.450012,814.971191,2166531.0,Adani Ports & SEZ Ltd.,ADANIPORTS
2023-01-04,820.799988,822.0,806.5,810.0,804.591003,3260112.0,Adani Ports & SEZ Ltd.,ADANIPORTS
2023-01-05,814.049988,821.599976,797.0,819.599976,814.126831,3119740.0,Adani Ports & SEZ Ltd.,ADANIPORTS
2023-01-06,819.900024,824.400024,803.5,806.099976,800.71698,2892006.0,Adani Ports & SEZ Ltd.,ADANIPORTS


In [13]:
nifty_data_df.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,company,company_code
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-11-10,3338.0,3347.449951,3322.350098,3333.449951,3333.449951,1338557.0,Tata Consultancy Services Ltd.,TCS
2023-11-13,3356.0,3356.0,3327.0,3331.550049,3331.550049,1030990.0,Tata Consultancy Services Ltd.,TCS
2023-11-15,3383.100098,3413.0,3356.050049,3404.300049,3404.300049,2059802.0,Tata Consultancy Services Ltd.,TCS
2023-11-16,3430.050049,3529.350098,3414.199951,3497.850098,3497.850098,3951984.0,Tata Consultancy Services Ltd.,TCS
2023-11-17,3497.850098,3524.5,3492.550049,3502.449951,3502.449951,2020363.0,Tata Consultancy Services Ltd.,TCS


In [14]:
nifty_data_df.to_csv('data/2023_nifty_stocks.csv', index=False)

In [15]:
import requests
from bs4 import BeautifulSoup



In [19]:
def fetch_financial_news(company):
    # Construct the URL for the company's financial news
    url = f"https://www.google.com/finance/quote/{company}:NSE"

    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the response
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the financial news headlines
        financial_news_headlines = soup.find_all('div', class_='b3g-news-headline')

        # Print the financial news headlines
        for headline in financial_news_headlines:
            print(headline.text)
        return soup
    else:
        print(f"Error fetching financial news for {company}: {response.status_code}")
        return None

In [76]:
check = fetch_financial_news('CIPLA')

In [77]:
m = [div.find('a').get('href') for div in check.find_all('div', class_='z4rs2b')]

In [78]:
m

['https://www.livemint.com/market/live-blog/cipla-share-price-live-blog-for-17-nov-2023-11700201410087.html',
 'https://www.livemint.com/market/stock-market-news/stocks-to-watch-tcs-bajaj-finance-dabur-india-ongc-cipla-vedanta-11700093427505.html',
 'https://www.business-standard.com/companies/news/cipla-concludes-51-18-stake-sale-in-ugandan-subsidiary-for-25-million-123111500941_1.html',
 'https://www.devdiscourse.com/article/business/2713601-cipla-completes-stake-sale-in-uganda-based-unit']

In [79]:
# check

In [80]:
def fetch_financial_news_by_url(url):
    # Construct the URL for the company's financial news
    # url = f"https://www.google.com/finance/quote/{company}:NSE"

    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content of the response
        soup = BeautifulSoup(response.content, 'html.parser')

        # Extract the financial news headlines
        financial_news_headlines = soup.find_all('div', class_='b3g-news-headline')

        # Print the financial news headlines
        for headline in financial_news_headlines:
            print(headline.text)
        return soup
    else:
        print(f"Error fetching financial news for {url}: {response.status_code}")
        return None

In [81]:
# check1 = fetch_financial_news_by_url(m[0])

def get_texts_soup_json(soup):
    json_texts = []
    script_tags = soup.find_all('script', {'type': 'application/json'})
    if script_tags:
        for script_tag in script_tags:
            json_texts.append(script_tag.text)
    return json_texts
    

def get_texts_soup_json_lrd(soup):
    json_texts = []
    script_tags = soup.find_all('script', {'type': 'application/ld+json'})
    if script_tags:
        for script_tag in script_tags:
            json_texts.append(script_tag.text)
    return json_texts


def get_texts_soup_from_p(soup):
    p_texts = []
    p_tags = soup.find_all('p')
    if p_tags:
        for p_tag in p_tags:
            p_texts.append(p_tag.text)
    return p_texts



all_texts = []
for url in m:
    url_soup = fetch_financial_news_by_url(url)
    if url_soup is not None:
        all_texts.append({
            'url':url,
            'p_texts': get_texts_soup_from_p(url_soup),
            'json_texts': get_texts_soup_json(url_soup),
            'json_lrd_texts': get_texts_soup_json_lrd(url_soup)
        })
    else:
        all_texts.append({
            'url':url,
            'p_texts': [],
            'json_texts': [],
            'json_lrd_texts': []
        })


# all_texts = [
#     {
#         'url':url,
#         'texts': [p_tag.text for p_tag in fetch_financial_news_by_url(url).find_all('p')]
#     }
#     for url in m if 
# ]

Error fetching financial news for https://www.business-standard.com/companies/news/cipla-concludes-51-18-stake-sale-in-ugandan-subsidiary-for-25-million-123111500941_1.html: 403


In [49]:
# help(check1)

In [50]:
# texts = [p_tag.text for p_tag in check1.find_all('p')]

In [69]:
# texts
import json

In [88]:
all_texts[0]['json_lrd_texts'][1]
# Use script_tag = soup.find('script', {'type': 'application/json'})
# <script type="application/ld+json">

' { "@context": "https://schema.org", "@type": "NewsArticle", "mainEntityOfPage": "https://www.livemint.com/market/live-blog/cipla-share-price-live-blog-for-17-nov-2023-11700201410087.html", "inLanguage": "en", "headline": "Cipla share price Today Live Updates : Cipla closed today at  ₹1249.35, up 1.17% from yesterday\'s  ₹1234.85 | Mint", "description": "Cipla stock price went up today, 17 Nov 2023, by 1.17 %. The stock closed at 1234.85 per share. The stock is currently trading at 1249.35 per share. Investors should monitor Cipla stock price closely in the coming days and weeks to see how it reacts to the news.", "url": "https://www.livemint.com/market/live-blog/cipla-share-price-live-blog-for-17-nov-2023-11700201410087.html", "datePublished": "2023-11-17T11:40:09+05:30", "dateModified": "2023-11-17T18:36:19+05:30", "articleBody": "Cipla\'s stock opened at  ₹1238.95 and closed at  ₹1234.85 on the last trading day. The high for the day was  ₹1256 and the low was  ₹1235.25. The market 

In [83]:
mm = '''
"2023-11-17T14:42:18+05:30", "articleBody": " Top active call options for Cipla at 17 Nov 14:42 were at strike price of ₹1250.0 (Expiry : 30 NOV 2023) & ₹1240.0 (Expiry : 30 NOV 2023) with prices ₹18.45 (+43.02%) & ₹25.0 (+44.51%) respectively.Top active put options for Cipla at 17 Nov 14:42 were at strike price of ₹1250.0 (Expiry : 30 NOV 2023) & ₹1200.0 (Expiry : 30 NOV 2023) with prices ₹14.0 (-38.46%) & ₹3.1 (-37.37%) respectively.Disclaimer: The Futures & Options data is at a delay of 15 minutes. ", "image": {
'''

In [84]:
import re

In [102]:
# Regular expression to extract the articleBody text
regex = r'"articleBody"\s*:\s*"(.*),\s*"\w+"\s*:'

regex1 = r'"articleBody"\s*:\s*"([^"]*)"'

# Extracting the articleBody text
match = re.search(regex1, all_texts[0]['json_lrd_texts'][1], flags=re.M)
extracted_text = match.group(0) if match else "No match found"
extracted_text

'"articleBody": "Cipla\'s stock opened at  ₹1238.95 and closed at  ₹1234.85 on the last trading day. The high for the day was  ₹1256 and the low was  ₹1235.25. The market cap stands at 100734.03 cr, with a 52-week high of  ₹1277.55 and a 52-week low of  ₹852. The BSE volume for the day was 25139 shares.Disclaimer: This is an AI-generated live blog and has not been edited by LiveMint staff.\n"'

In [96]:
# help(re)

In [103]:
match = re.search(regex1, mm, flags=re.M)
extracted_text = match.group(0) if match else "No match found"
extracted_text

'"articleBody": " Top active call options for Cipla at 17 Nov 14:42 were at strike price of ₹1250.0 (Expiry : 30 NOV 2023) & ₹1240.0 (Expiry : 30 NOV 2023) with prices ₹18.45 (+43.02%) & ₹25.0 (+44.51%) respectively.Top active put options for Cipla at 17 Nov 14:42 were at strike price of ₹1250.0 (Expiry : 30 NOV 2023) & ₹1200.0 (Expiry : 30 NOV 2023) with prices ₹14.0 (-38.46%) & ₹3.1 (-37.37%) respectively.Disclaimer: The Futures & Options data is at a delay of 15 minutes. "'