In [None]:
import csv
from collections import defaultdict
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
import os
import pandas as pd
from prophet import Prophet
import get_info

# Constants
RESULT_DIR = 'Result'
CSV_FILE_PATH = os.path.join(RESULT_DIR, 'tensorflow_releases.csv')
TIMELINE_PLOT_PATH = os.path.join(RESULT_DIR, 'release_timeline.png')
INTERVALS_PLOT_PATH = os.path.join(RESULT_DIR, 'release_intervals_distribution.png')
FORECAST_PLOT_PATH = os.path.join(RESULT_DIR, 'release_forecast.png')

def save_releases_to_csv(releases):
    """Save release information to a CSV file."""
    os.makedirs(RESULT_DIR, exist_ok=True)
    fieldnames = ['tag_name', 'published_at', 'name', 'body']
    
    with open(CSV_FILE_PATH, 'w', newline='', encoding='utf-8') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for release in releases:
            writer.writerow({
                'tag_name': release['tag_name'],
                'published_at': release['published_at'],
                'name': release['name'],
                'body': release['body']
            })

def calculate_release_intervals(releases):
    """Calculate the intervals between consecutive releases."""
    intervals = []
    for i in range(1, len(releases)):
        prev_date = datetime.strptime(releases[i-1]['published_at'], '%Y-%m-%dT%H:%M:%SZ')
        curr_date = datetime.strptime(releases[i]['published_at'], '%Y-%m-%dT%H:%M:%SZ')
        interval = (curr_date - prev_date).days
        intervals.append(interval)
    return intervals

def analyze_release_intervals(intervals):
    """Analyze release intervals to find average, min, and max."""
    avg_interval = sum(intervals) / len(intervals)
    min_interval = min(intervals)
    max_interval = max(intervals)
    return avg_interval, min_interval, max_interval

def plot_release_timeline(releases):
    """Plot the number of releases per year."""
    os.makedirs(RESULT_DIR, exist_ok=True)
    year_count = defaultdict(int)
    
    for release in releases:
        date = datetime.strptime(release['published_at'], '%Y-%m-%dT%H:%M:%SZ')
        year_count[date.year] += 1

    years = sorted(year_count.keys())
    counts = [year_count[year] for year in years]

    plt.figure(figsize=(10, 8))
    plt.plot(years, counts, marker='o', linestyle='-')
    plt.title('TensorFlow Releases per Year')
    plt.xlabel('Year')
    plt.ylabel('Number of Releases')
    plt.grid(True)
    plt.tight_layout()

    for year, count in zip(years, counts):
        plt.text(year, count, str(count))

    plt.savefig(TIMELINE_PLOT_PATH)
    plt.close()

def plot_release_intervals(intervals):
    """Plot the distribution of release intervals."""
    interval_counts = defaultdict(int)
    for interval in intervals:
        interval_counts[interval] += 1

    sorted_intervals = sorted(interval_counts.items())
    intervals, counts = zip(*sorted_intervals)

    plt.figure(figsize=(10, 6))
    plt.plot(intervals, counts, marker='o', linestyle='-', color='b')
    plt.title('Distribution of Release Intervals')
    plt.xlabel('Interval (days)')
    plt.ylabel('Frequency')
    plt.grid(True)
    plt.savefig(INTERVALS_PLOT_PATH)
    plt.close()

def predict_future_releases(releases, num_future_releases=5):
    """Predict future release dates based on average interval."""
    intervals = calculate_release_intervals(releases)
    avg_interval = sum(intervals) / len(intervals)
    last_release_date = datetime.strptime(releases[0]['published_at'], '%Y-%m-%dT%H:%M:%SZ')

    future_releases = []
    for _ in range(num_future_releases):
        next_release_date = last_release_date + timedelta(days=avg_interval)
        future_releases.append(next_release_date.strftime('%Y-%m-%d'))
        last_release_date = next_release_date
    return future_releases

def predict_with_prophet(releases):
    """Predict future releases using Facebook's Prophet model."""
    data = [{'ds': datetime.strptime(release['published_at'], '%Y-%m-%dT%H:%M:%SZ'), 'y': 1} for release in releases]
    df = pd.DataFrame(data)
    df['ds'] = pd.to_datetime(df['ds'])
    df = df.sort_values(by='ds').reset_index(drop=True)
    df = df.resample('D', on='ds').sum().reset_index()

    model = Prophet(interval_width=0.95)
    model.fit(df)

    future = model.make_future_dataframe(periods=365)
    forecast = model.predict(future)

    fig = model.plot(forecast)
    plt.title('Release Date Forecast')
    plt.xlabel('Date')
    plt.ylabel('Releases')
    plt.savefig(FORECAST_PLOT_PATH)
    plt.close()

    return forecast

def analyze_releases(owner, repo):
    """Analyze and predict releases for a given GitHub repository."""
    url = f'https://api.github.com/repos/{owner}/{repo}/releases'
    releases = get_info.get_info(url)

    if not releases:
        print('No release information found.')
        return None, None, None, None, None, None

    save_releases_to_csv(releases)
    plot_release_timeline(releases)

    intervals = calculate_release_intervals(releases)
    avg_interval, min_interval, max_interval = analyze_release_intervals(intervals)
    plot_release_intervals(intervals)

    future_releases = predict_future_releases(releases)
    forecast = predict_with_prophet(releases)
    last_ten_forecasts = forecast.tail(10)

    return CSV_FILE_PATH, TIMELINE_PLOT_PATH, INTERVALS_PLOT_PATH, FORECAST_PLOT_PATH, future_releases, last_ten_forecasts

# Example usage
if __name__ == "__main__":
    owner = "tensorflow"
    repo = "tensorflow"
    csv_path, timeline_path, intervals_path, forecast_path, future_releases, last_ten_forecasts = analyze_releases(owner, repo)
    print(f"CSV saved at: {csv_path}")
    print(f"Timeline plot saved at: {timeline_path}")
    print(f"Intervals plot saved at: {intervals_path}")
    print(f"Forecast plot saved at: {forecast_path}")
    print(f"Predicted future releases: {future_releases}")
    print(f"Last 10 forecasts: {last_ten_forecasts}")