<a href="https://colab.research.google.com/github/vvvvvvss/StockMarketManupilationSystem/blob/main/Stock_ManupilationV1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# MARCH PROGRESS

# Real-time data processing and analysis

In [None]:
pip install aiohttp pandas confluent-kafka



Trying to see what kind of data can be fetched from the API: Alphavantage

In [None]:
import requests
import pandas as pd
import time

API_KEY = "QT13WY791JO16QMJ"
BASE_URL = "https://www.alphavantage.co/query"

def fetch_stock_data(symbol, interval="5min"):
    params = {
        "function": "TIME_SERIES_INTRADAY",
        "symbol": symbol,
        "interval": interval,
        "apikey": API_KEY,
        "outputsize": "compact"
    }
    response = requests.get(BASE_URL, params=params)
    data = response.json()

    if "Error Message" in data:
        print(f"Error fetching data for {symbol}: {data['Error Message']}")
        return None
    elif f"Time Series ({interval})" in data:
        time_series = data[f"Time Series ({interval})"]
        df = pd.DataFrame.from_dict(time_series, orient="index")
        df.reset_index(inplace=True)
        df.rename(columns={"index": "timestamp"}, inplace=True)
        return df
    else:
        print(f"Unexpected data format for {symbol}: {data}")
        return None

stock_data = fetch_stock_data("AAPL")
if stock_data is not None:
    print(stock_data.head())
else:
    print("Could not retrieve stock data.")

             timestamp   1. open   2. high    3. low  4. close 5. volume
0  2025-03-25 19:55:00  224.1300  224.3300  224.1000  224.2400      3316
1  2025-03-25 19:50:00  224.2000  224.3300  224.0700  224.1000       738
2  2025-03-25 19:45:00  224.1500  224.3300  224.0700  224.2800      2743
3  2025-03-25 19:40:00  224.1500  224.1500  224.0700  224.0700       834
4  2025-03-25 19:35:00  224.1000  224.1500  224.0500  224.0701       546


#    Data Collection - Rough

1.   Fetch trading data from Alpha Vantage
2.   Detect potential market manipulation using Isolation Forest
3.   Mock implementation of social media sentiment collection



In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
import requests

class MarketManipulationDetector:
    def __init__(self, alpha_vantage_key):
        self.alpha_vantage_key = alpha_vantage_key
        self.trading_data = None
        self.sentiment_data = None

    def fetch_trading_data(self, symbol):
        url = f"https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol={symbol}&apikey={self.alpha_vantage_key}"
        response = requests.get(url)

        if response.status_code == 200:
            raw_data = response.json()
            self.trading_data = pd.DataFrame.from_dict(
                raw_data.get('Time Series (Daily)', {}),
                orient='index'
            )
            self.trading_data.columns = [
                'open', 'high', 'low', 'close', 'volume'
            ]
            self.trading_data = self.trading_data.astype(float)

    def detect_anomalous_trading(self):
        if self.trading_data is None:
            raise ValueError("Trading data not loaded")


        features = ['volume', 'close']
        X = self.trading_data[features]

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        clf = IsolationForest(contamination=0.1, random_state=42)
        y_pred = clf.fit_predict(X_scaled)
        self.trading_data['is_anomaly'] = y_pred == -1

        return self.trading_data[self.trading_data['is_anomaly']]

    def collect_social_sentiment(self, symbol):
        #  without StockTwits API

        fake_sentiments = {
            'bullish': 0.6,
            'bearish': 0.3,
            'neutral': 0.1
        }
        return fake_sentiments

def main():

    detector = MarketManipulationDetector(alpha_vantage_key='QT13WY791JO16QMJ')
    detector.fetch_trading_data('INFY')

    anomalies = detector.detect_anomalous_trading()
    print("Potential Manipulative Trading Days:")
    print(anomalies)


    sentiment = detector.collect_social_sentiment('INFY')
    print("\nSocial Media Sentiment:")
    print(sentiment)

if __name__ == "__main__":
    main()

Potential Manipulative Trading Days:
             open    high      low  close      volume  is_anomaly
2025-03-28  18.37  18.482  18.1050  18.17   7866062.0        True
2025-03-27  18.70  18.780  18.5950  18.67   6249534.0        True
2025-03-21  18.41  18.430  18.1700  18.32  18677618.0        True
2025-03-20  18.33  18.390  17.9001  18.06  19376214.0        True
2025-03-13  18.50  18.585  18.2600  18.29  10913566.0        True
2025-03-12  18.49  18.645  18.3400  18.50  15292391.0        True
2025-03-11  19.13  19.200  18.8100  18.97  17695135.0        True
2025-01-16  22.60  22.600  21.3100  21.57  22922717.0        True
2024-12-19  23.18  23.620  23.1000  23.42   9178696.0        True
2024-12-13  23.52  23.630  23.2800  23.40   4443501.0        True

Social Media Sentiment:
{'bullish': 0.6, 'bearish': 0.3, 'neutral': 0.1}


In [None]:
!pip install alpha_vantage

Collecting alpha_vantage
  Downloading alpha_vantage-3.0.0-py3-none-any.whl.metadata (12 kB)
Downloading alpha_vantage-3.0.0-py3-none-any.whl (35 kB)
Installing collected packages: alpha_vantage
Successfully installed alpha_vantage-3.0.0


# Main code

In [None]:
import requests
import pandas as pd
import json
from textblob import TextBlob
from alpha_vantage.timeseries import TimeSeries


#stock data collection
ALPHA_VANTAGE_API_KEY = "ED3T9IQN5OD495QC"
STOCK_SYMBOL = "AAPL"

ts = TimeSeries(key=ALPHA_VANTAGE_API_KEY, output_format='pandas')
data, meta_data = ts.get_daily(symbol=STOCK_SYMBOL, outputsize='compact')


data.to_csv("stock_data.csv") # storing stock data as a CSV file
print("Stock data saved successfully.")

# StockTwits Data
def fetch_stocktwits_data(symbol):
    url = f"https://api.stocktwits.com/api/2/streams/symbol/{symbol}.json"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()
    else:
        return None

def analyze_sentiment(messages):
    sentiments = []
    for msg in messages:
        text = msg['body']
        sentiment = TextBlob(text).sentiment.polarity
        sentiments.append({'timestamp': msg['created_at'], 'text': text, 'sentiment_score': sentiment})
    return sentiments

stocktwits_data = fetch_stocktwits_data("TCS")
if stocktwits_data:
    messages = stocktwits_data['messages']
    sentiment_analysis = analyze_sentiment(messages)
    df_sentiment = pd.DataFrame(sentiment_analysis)
    df_sentiment.to_csv("sentiment_data.csv", index=False)
    print("Sentiment data saved successfully.")
else:
    print("Failed to fetch StockTwits data.")


def analyze_news_sentiment(news_text):
    return TextBlob(news_text).sentiment.polarity

news_text_sample = "Stock markets rally as tech stocks soar."
print("Sample News Sentiment Score:", analyze_news_sentiment(news_text_sample))


Stock data saved successfully.
Failed to fetch StockTwits data.
Sample News Sentiment Score: 0.0


In [None]:
!pip install feedparser

Collecting feedparser
  Downloading feedparser-6.0.11-py3-none-any.whl.metadata (2.4 kB)
Collecting sgmllib3k (from feedparser)
  Downloading sgmllib3k-1.0.0.tar.gz (5.8 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: sgmllib3k
  Building wheel for sgmllib3k (setup.py) ... [?25l[?25hdone
  Created wheel for sgmllib3k: filename=sgmllib3k-1.0.0-py3-none-any.whl size=6047 sha256=208730de486f11e99ae97bbc567df4a5a2853405ec2ad5fb060084893a8550ad
  Stored in directory: /root/.cache/pip/wheels/3b/25/2a/105d6a15df6914f4d15047691c6c28f9052cc1173e40285d03
Successfully built sgmllib3k
Installing collected packages: sgmllib3k, feedparser
Successfully installed feedparser-6.0.11 sgmllib3k-1.0.0


# Data Collection
1. Fetch daily stock data using Alpha Vantage API
2. Analyze sentiment of messages
3. Analyze sentiment of news text



In [None]:
import requests
import pandas as pd
import json
from textblob import TextBlob
from alpha_vantage.timeseries import TimeSeries
import feedparser

ALPHA_VANTAGE_API_KEY = "ED3T9IQN5OD495QC"
STOCK_SYMBOL = "AAPL"
STOCKTWITS_API_URL = "https://api.stocktwits.com/api/2/streams/symbol/{symbol}.json"

def fetch_stock_data(symbol, api_key):
    try:
        ts = TimeSeries(key=api_key, output_format='pandas')
        data, meta_data = ts.get_daily(symbol=symbol, outputsize='compact')
        data.to_csv("stock_data.csv")
        print(f"\nStock data for {symbol} saved successfully.")
        return data
    except Exception as e:
        print(f"Error fetching stock data: {e}")
        return None

def fetch_stocktwits_data(symbol):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        url = STOCKTWITS_API_URL.format(symbol=symbol)
        response = requests.get(url, headers=headers)

        if response.status_code == 200:
            data = response.json()
            print(f"Successfully fetched StockTwits data for {symbol}")
            return data
        else:
            print(f"Failed to fetch StockTwits data. Status code: {response.status_code}")
            print(f"Response content: {response.text}")
            return None
    except Exception as e:
        print(f"Error in fetching StockTwits data: {e}")
        return None

def analyze_sentiment(messages):
    sentiments = []
    for msg in messages:
        text = msg.get('body', '')
        sentiment = TextBlob(text).sentiment.polarity
        sentiments.append({
            'timestamp': msg.get('created_at', 'N/A'),
            'text': text,
            'sentiment_score': sentiment
        })
    return sentiments

def get_google_news_rss(stock_name):
    url = f"https://news.google.com/rss/search?q={stock_name}+stock"
    feed = feedparser.parse(url)

    news_list = []
    for entry in feed.entries[:5]:  # Fetch top 5 news articles
        news_list.append({"title": entry.title, "link": entry.link})

    return news_list

news_data = get_google_news_rss("TCS")
for news in news_data:
    print("\n",news["title"], "-", news["link"])

def analyze_news_sentiment(news_data):
    return TextBlob(news_data).sentiment.polarity

def main():
    stock_data = fetch_stock_data(STOCK_SYMBOL, ALPHA_VANTAGE_API_KEY)
    stocktwits_data = fetch_stocktwits_data(STOCK_SYMBOL)

    if stocktwits_data and 'messages' in stocktwits_data:
        sentiment_analysis = analyze_sentiment(stocktwits_data['messages'])
        df_sentiment = pd.DataFrame(sentiment_analysis)
        df_sentiment.to_csv("sentiment_data.csv", index=False)
        print("Sentiment data saved successfully.")
    else:
        print("No messages found in StockTwits data.")
    news_text_sample = "Stock markets rally as tech stocks soar."
    print("Sample News Sentiment Score:", analyze_news_sentiment(news_text_sample))

if __name__ == "__main__":
    main()


 TCS, Infosys, HDFC Bank, HCL Tech among 5 key stocks to declare interim dividend in April 2025 - Mint - https://news.google.com/rss/articles/CBMi8wFBVV95cUxQOXJ0TEd1UWVmTXRSS0UxYThjNnJjaS1SWWJReGdLS1RYMVAwR2pwaE00OExTQzhKeFdUckVYTlh3dFNZQ19vQkV2TG5zdVRqd19ZVXVaRlF4TFk4MGZBR2EtV3YzZXI5RkNzU01icHZCbVo0YTdhemstVU1EN2wwaUdUYnFINk5DRTlsbk95UTJfeFQ0WUYyVkxFRFZKaG5Cc212QVFycjJxVFZVRkZuN09oQjBPRF9fTmc5RzlrZG1tMW1HMjJuUHpHWWJSU3RtT2NsOWpPbkJtQ3hxMUJWRTA5c0lnLTE3WXhGS21tXzQ3OUXSAfgBQVVfeXFMTXd6aFRBUEROLW95SDdmeVU0dldqREJxdGpIOUJYV2x4XzFYS21zNlNZa3JmR1o5ZXpYVEQxSVB3NVVBblRWb0haenFVSGJqdHFONzllUlFXcVhINlplN2Vicm5kQS1oZlZIbFo5ZHltNVVscDBIdXl0Nm5EY1Zud3UtRmZMQmF4SHZlaWo4NGdCVEFDU1RyNlUtcDE2cGxTdGdfR01TdGFrTG8wVU93TlhLTktFNXVHZE50ejN0VUNTNnNxRzh4RmlwejJQNkdoTmNFUWtYZTYtcmR3cGdhazdibnJ6a0pmS3pyR0s1TGU3WHhiZTE3Tjg?oc=5

 32% target price slash! Goldman flags big risks for TCS, Infosys & other IT stocks amid US worries - The Economic Times - https://news.google.com/rss/articles/CBMi3AFBVV95cUxPdVdENUl