### get_season_data

In [8]:
# %run import_modules.ipynb
# configuration = cfbd.Configuration()
# configuration.api_key['Authorization'] = api_key
# configuration.api_key_prefix['Authorization']='Bearer'
# api_instance = cfbd.GamesApi(cfbd.ApiClient(configuration))
# %run process_game.ipynb
# %run extract_team_data.ipynb
# %run fetch_data_with_retries.ipynb

In [9]:
# Function to get season data
def get_season_data(year, start_week, end_week):
    """
    Retrieves and processes data for a whole season.

    :param year: Year of the game data.
    :param start_week: The first week of the season.
    :param end_week: The last week of the season.
    :return: DataFrame containing extracted team statistics for the whole season.
    """
    all_data = []

    # Function to process data for a specific week
    def process_week(week):
        print(f"Processing week {week} of {year}")
        try:
            return extract_team_data(year, week)
        except Exception as e:
            print(f"An error occurred while processing week {week}: {e}")
            return pd.DataFrame()  # Return an empty DataFrame in case of failure
    
    # Use parallel processing to handle multiple weeks
    with ThreadPoolExecutor(max_workers=5) as executor:  # Adjust max_workers as needed
        futures = [executor.submit(process_week, week) for week in range(start_week, end_week + 1)]
        for future in as_completed(futures):
            try:
                week_data = future.result()
                if not week_data.empty:
                    all_data.append(week_data)
            except Exception as e:
                print(f"An error occurred: {e}")

    # Combine all data into a single DataFrame
    combined_df = pd.concat(all_data, ignore_index=True).fillna(0)  # Replace NaNs with 0 if needed
    return combined_df

In [20]:
teamStats20=get_season_data(year=2020,start_week=1,end_week=15)

Processing week 1 of 2020
Processing week 2 of 2020
Processing week 3 of 2020
Processing week 4 of 2020
Processing week 5 of 2020
Processing week 6 of 2020
Processing week 7 of 2020
Processing week 8 of 2020
Processing week 9 of 2020
Processing week 10 of 2020
Processing week 11 of 2020
Processing week 12 of 2020
Processing week 13 of 2020
Processing week 14 of 2020
Processing week 15 of 2020


In [21]:
teamStats20.loc[teamStats20.team_name=='Michigan']

Unnamed: 0,game_id,team_id,team_name,home_away,points,week,tacklesForLoss,defensiveTDs,tackles,sacks,...,completionAttempts,netPassingYards,totalYards,fourthDownEff,thirdDownEff,firstDowns,puntReturnYards,puntReturnTDs,puntReturns,totalFumbles
421,401247294,130,Michigan,away,49,8,8,1,45,5,...,15-25,225,478,0-1,4-8,18,0,0,0,0
506,401247305,130,Michigan,home,24,9,2,0,16,0,...,32-52,300,452,1-1,7-17,28,31,0,2,0
597,401247319,130,Michigan,home,11,11,2,0,42,1,...,13-26,172,219,0-1,3-11,10,0,0,0,0
697,401247309,130,Michigan,away,21,10,3,0,53,0,...,18-34,344,357,0-0,3-11,17,14,0,2,0
782,401247324,130,Michigan,away,48,12,3,1,41,2,...,32-48,349,497,2-3,6-16,29,0,0,1,0
799,401247333,130,Michigan,home,17,13,2,0,42,1,...,13-28,112,286,1-3,4-12,14,2,0,2,0


In [23]:
# con=sqlite3.connect('collegeFootball.db')
# teamStats20.to_sql(name='teamStats20',con=con)
# con.commit()
# con.close()
# con=sqlite3.connect('collegeFootball.db')
# p3=pd.read_sql("SELECT * FROM teamStats20",con=con)