In [1]:
# %pip install requests
# %pip install beautifulsoup4
# %pip install selenium
# %pip install pandas
# %pip install geopy
# %pip install geopandas
# %pip install folium
# %pip install openmeteo-requests
# %pip install requests-cache retry-requests numpy pandas
# %pip install timezonefinder
# % pip install seaborn

In [2]:
import os
import time
import requests
import pandas as pd
import geopandas as gpd
import folium
import openmeteo_requests
import requests_cache
import sqlite3
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import json
import re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from geopy.geocoders import Nominatim
from geopy.geocoders import OpenCage
from folium import Marker
from folium import GeoJson
from retry_requests import retry
from timezonefinder import TimezoneFinder 

Function Breakdown Section 4
---
Vintage_Dataframe
- grabs two dataframes based on the URLs pulled from the popular producers of the specific wines
- one for all the wine vintage data, and one based on popular stats for the wine 

Final_Vintage_DataFrame 
- creates a dataframe with vintage data
- included here critically is the ratings data tied to the specific producers

Converted_Weather_Data
- converts output dataframe from All_Weather_Data into a dataframe indexed across producers and years 
- the columns are monthly averages based on the daily data
- adds in the rating and price data for the specific producer of a specific wine for a specific year 
- final form set up to allow easier training on a model 

In [3]:
def Vintage_Dataframe(popular_wines_df):
    """
    Input: takes in a dataframe of popular wines, which includes a column with the URL for each of the wines
    Output: two dataframes 
        Recommended vintages: which has the data on the specific wines chosen 
        All Vintages: which has data on all the wines
    """
    #Initialize lists to store the extracted data
    all_recommended_vintages = []
    all_vintages_data = []

    #Stips each of the URLs so they are in proper form 
    popular_wines_df['URL'] = popular_wines_df['URL'].str.split('?').str[0]

    #Loop row in the DataFrame
    for index, row in popular_wines_df.iterrows():

        #Grabs each URL and strips and remaining white space 
        url = row['URL'].strip() 
        
        try:
            r = requests.get(url, headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0"
            })

            #Check if the request was successful
            if r.status_code == 200:
                
                #Search for the JavaScript data in the page source
                res = re.search(r"window\.__PRELOADED_STATE__\.winePageInformation\s*=\s*(.*});", r.text, re.MULTILINE)
                
                if res:
                    data = json.loads(res.group(1))
                    
                    #Extract recommended vintages
                    recommended_vintages = data.get("recommended_vintages", [])
                    if recommended_vintages:
                        recommended_df = pd.DataFrame(recommended_vintages)

                        #Add the URL for reference
                        recommended_df['source_url'] = url  
                        all_recommended_vintages.append(recommended_df)

                    #Extract all vintages
                    all_vintages = data.get("wine", {}).get("vintages", [])
                    if all_vintages:
                        all_vintages_df = pd.DataFrame(all_vintages)

                        #Add the URL for reference
                        all_vintages_df['source_url'] = url  
                        all_vintages_data.append(all_vintages_df)

                else:
                    print(f"No data found for URL: {url}")
            else:
                print(f"Failed to retrieve data for URL: {url}, Status code: {r.status_code}")
        
        except Exception as e:
            print(f"An error occurred for URL: {url} - {str(e)}")

    #Concats all the recommended vintages and all vintages data into DataFrames
    recommended_vintages_df = pd.concat(all_recommended_vintages, ignore_index=True)
    all_vintages_df = pd.concat(all_vintages_data, ignore_index=True)
   
    return recommended_vintages_df, all_vintages_df

In [106]:
def Final_Vintage_Dataframe(recommended_vintages_df, all_vintages_df, popular_wines_df, lower_date_bound = 1900, lower_rating_bound  = 3, lower_review_count = 1):
    """
    Input: two dataframes 
        Recommended vintages: which has the data on the specific wines chosen 
        All Vintages: which has data on all the wines
    Output:  one dataframe 
        all_filtered_winebottle: dataframe with vintage dataframe, of importance is the rating, price, year and producer name
    """

    #Defines a function for internal function use that grabs key-values pairs
    def extract_object_data(Object_data):
        """
        Extracts key-value pairs from an object and returns a Series
        """
        if isinstance(Object_data, dict):
            return pd.Series(Object_data)
        else:
            return pd.Series()  

    #Filters the vintages df so that the all the data is only gotten for those with a valid rating 
    all_vintages_df_True = all_vintages_df[all_vintages_df["has_valid_ratings"] == True]

    #Apply the function to the 'object_column' and create a new DataFrame
    new_columns = all_vintages_df_True['statistics'].apply(extract_object_data)

    #Concatenate the original DataFrame with the new columns
    all_vintages_df_True = pd.concat([all_vintages_df_True, new_columns], axis=1)
    
    #Grabs specific volumns from the vintage data 
    df4 = all_vintages_df_True[['id', 'name', 'year', 'ratings_average', 'reviews_count']]

    #Grabs the id and amount from the vintage data then creates a new dataframe 
    recommended_vintages_df['id'] =recommended_vintages_df['vintage'].apply(lambda x: x.get('id'))
    df5 =recommended_vintages_df[['id', 'type']].drop_duplicates(subset = ['id'])
    
    #Merges the two dataframes
    final_merge_df = pd.merge(df4, df5, on='id', how='left')

    #Filters the data by year, ratings average and reviews count 
    all_filtered_winebottle = final_merge_df[
        (final_merge_df['year'] >= lower_date_bound) &
        (final_merge_df['ratings_average'] > lower_rating_bound) &
        (final_merge_df['reviews_count'] > lower_review_count)
    ]

    #Change the columns names 
    all_filtered_winebottle = all_filtered_winebottle.rename(columns={'id': 'ID',
                                                                      'name': 'Producer_v',
                                                                      'year': 'Year_v',
                                                                      'ratings_average': 'Ratings Average',
                                                                      'reviews_count': 'Reviews Count',
                                                                      'type': 'Type',
                                                                      })
    
    def producer_name(row, popular_wines):
        """
        Checks if a row is in the list of producers, and returns the producer name 
        """
        list_of_producers = popular_wines['Producer'].unique()

        for producer in list_of_producers:

            lower_producer = producer.lower()
            lower_row = row.lower()

            if lower_producer in lower_row:

                return producer
            
        return row
    

    def wine_type(row, popular_wines):
        """
        Checks if a row is in the list of wines, and returns the WineType 
        """
        list_of_wines = popular_wines['WineType'].unique().tolist()

        list_of_wines.append(['château margaux', 'cabernet sauvignon', 'pinot noir', 'zinfandel', 'syrah', 
                            'pinot gris', 'sauvignon blanc', 'chardonnay', 'baco noir', 'bordeaux',
                            'malbec', 'chardonnay', 'pinot grigio', 'merlot', 'sangiovese', 'shiraz',
                            'cabernet franc', 'muscat', 'grenache', 'sangiovese'])
        
        for wine in list_of_wines:
           
            lower_wine = wine.lower()
            lower_row = row.lower()

            if lower_wine in lower_row:

                return wine
            
        return row
    
    # return all_filtered_winebottle['Producer_v']
    #Goes through the names column and changes the name to the producer name, to allow for later merging between dataframes
    all_filtered_winebottle['WineType'] = all_filtered_winebottle['Producer_v'].apply(lambda row: wine_type(row, popular_wines_df))
    all_filtered_winebottle['Producer_v'] = all_filtered_winebottle['Producer_v'].apply(lambda row: producer_name(row, popular_wines_df))

    #Add a producer/year column for future merging 
    all_filtered_winebottle['Producer/Year'] = all_filtered_winebottle.apply(lambda row: row['Producer_v'] + ' ' + str(row['Year_v']), axis=1)

    return all_filtered_winebottle

In [None]:
def Training_Data(weather_df, vintage_df):
    """
    Input: Weather dataframe indexed on the daily scale,
        Vintage dataframe which has yearly bottles from the same producer and wine type
    Output: Weather dataframe indexed based on a single row for a producer and year 
        columns will be monthly min, max or average for the respective data per year
    """
    #Weather_df.index = pd.to_datetime(weather_df.index)
    producers = weather_df['Producer'].unique()

    #Initializes a dataframe 
    final_df = pd.DataFrame()

    #Check to see if date is the index
    if type(weather_df.index[0]) == int:

        weather_df = weather_df.set_index('Date') 

    #Create a column for the year and produce all the unique years
    weather_df.index = pd.to_datetime(weather_df.index)
    weather_df['Year'] = weather_df.index.year
    unique_years = weather_df['Year'].unique()

    #Find all the unique months
    weather_df['Month'] = weather_df.index.month
    unique_months = weather_df['Month'].unique()

    #Gives key for the month names 
    month_names = {1 : 'January', 2 : 'February', 3 : 'March', 4 : 'April', 5 : 'May', 
                                  6 : 'June', 7 : 'July', 8 : 'August', 9 : 'September', 10 : 'October', 11 : 'November',
                                  12 : 'December'}

    for producer in producers:

        #Filter the data based on the producer
        producer_df = weather_df[weather_df['Producer'] == producer]

        #Iterate over each unique year
        for year in unique_years:
            
            #Filter the data for the specific year
            yearly_df = producer_df[producer_df['Year'] == year]

            #Creates initial data
            yearly_data = {'Producer' : producer, 'Year' : year}

            #Ierate over each unique month
            for month in unique_months: 

                #Filter the data for the specific month
                monthly_df = yearly_df[yearly_df['Month'] == month]

                #Group by month
                Grouped_Daily_into_Monthly = monthly_df.groupby(monthly_df.index.to_period('M'))

                #Change month number to month name
                month = month_names[month]

                yearly_data.update({
                    
                    'WineType' : Grouped_Daily_into_Monthly['Top WineType'].first()[0],
                    'District' : Grouped_Daily_into_Monthly['Top District'].first()[0],
                    f'{month} Max Temp (°C)' : Grouped_Daily_into_Monthly['Max Temp (°C)'].max().iloc[0],
                    f'{month} Min Temp (°C)' : Grouped_Daily_into_Monthly['Min Temp (°C)'].min().iloc[0],
                    f'{month} Avg Temp (°C)' : Grouped_Daily_into_Monthly['Avg Temp (°C)'].mean().iloc[0],
                    f'{month} Max Relative Humidity' : Grouped_Daily_into_Monthly['Max Relative Humidity'].max().iloc[0],
                    f'{month} Min Relative Humidity' : Grouped_Daily_into_Monthly['Min Relative Humidity'].min().iloc[0],
                    f'{month} Avg Relative Humidity' : Grouped_Daily_into_Monthly['Max Relative Humidity'].mean().iloc[0],
                    f'{month} Cumulative Rain (mm)' : Grouped_Daily_into_Monthly['Cumulative Rain (mm)'].sum().iloc[0],
                    f'{month} Cumulative Snow (mm)' : Grouped_Daily_into_Monthly['Cumulative Snow (mm)'].sum().iloc[0],
                    f'{month} Cumulative Precip (mm)' : Grouped_Daily_into_Monthly['Cumulative Rain (mm)'].sum().iloc[0] + Grouped_Daily_into_Monthly['Cumulative Snow (mm)'].sum().iloc[0],
                    f'{month} Cumulative Snow (mm)' : Grouped_Daily_into_Monthly['Cumulative Snow (mm)'].sum().iloc[0],
                    f'{month} Avg Cloud Cover (%)' : Grouped_Daily_into_Monthly['Avg Cloud Cover (%)'].mean().iloc[0],
                    f'{month} Max Wind Speed (Km/h)' : Grouped_Daily_into_Monthly['Max Wind Speed (Km/h)'].max().iloc[0],
                    f'{month} Min Wind Speed (Km/h)' : Grouped_Daily_into_Monthly['Min Wind Speed (Km/h)'].min().iloc[0],
                    f'{month} Avg Wind Speed (Km/h)' : Grouped_Daily_into_Monthly['Avg Wind Speed (Km/h)'].mean().iloc[0],
                    f'{month} Avg Daylight Hours' : Grouped_Daily_into_Monthly['Daylight Hours'].mean().iloc[0] 

                })
            
            #Adds this yearly data to original df
            final_df = pd.concat([final_df, pd.DataFrame([yearly_data])], ignore_index=True)
    
    #Add a producer/year column for future merging and reset index
    final_df = final_df.reset_index()
    final_df['Producer/Year'] = final_df.apply(lambda row: row['Producer'] + ' ' + str(row['Year']), axis=1)
    weather_df_columns = final_df.drop(columns = ['index'])

    #Combine the two dataframes
    final_df = pd.merge(final_df, vintage_df, on = 'Producer/Year', how = 'left')

    #Modify the dataframe for easier viewing
    final_df = final_df.drop(columns = ['index','Producer/Year', 'ID', 'Reviews Count', 'Producer_v', 'Year_v'])
    final_df.insert(0, 'Producer', final_df.pop('Producer'))
    final_df.insert(1, 'District', final_df.pop('District'))
    final_df.insert(2, 'Year', final_df.pop('Year'))
    final_df.insert(3, 'Ratings Average', final_df.pop('Ratings Average'))

    #Drop all the rows that have NAN in the ratings average column 
    final_df = final_df.dropna(subset = ['Ratings Average'])

    return final_df, weather_df_columns

In [None]:
def Training_Data_Model2(formatted_weather_data, vintage_df_M2, all_bottle_data):
    """
    
    """

    district_df = all_bottle_data[['Top District', 'WineType', 'Producer', 'Price']]

    int_df = pd.merge(vintage_df_M2, district_df, left_on = 'Producer_v', right_on = 'Producer')
    
    final_df = pd.DataFrame()
    final_df[['Producer', 'District', 'Year', 'Rating', 'Price', 'Type']] = int_df[['Producer_v', 'Top District', 'Year_v', 'Ratings Average', 'Price', 'Type']]
    # formatted_weather_data.drop(['Producer', ['']])

    # final_df_M2 = pd.merge(formatted_weather_data, vintage_df_M2, on = 'Producer/Year', how = 'left')


    return final_df

---

Variable Section 4 
---

In [6]:
final_df_file_path = r'C:\Users\fwhal\Downloads\CME528\Project\Repo-2\BreakinBadCode\Final_DataFrames'

FINAL_wine_df_FINAL = pd.read_csv(os.path.join(final_df_file_path, 'FINAL_wine_df_FINAL.csv'))
FINAL_weather_data_FINAL = pd.read_csv(os.path.join(final_df_file_path, 'FINAL_weather_data_FINAL.csv'))
FINAL_wine_df_filtered_1Bottle_FINAL = pd.read_csv(os.path.join(final_df_file_path, 'FINAL_wine_df_filtered_1Bottle_FINAL.csv'))
FINAL_wine_df_filtered_All_Bottles_FINAL = pd.read_csv(os.path.join(final_df_file_path, 'FINAL_wine_df_filtered_All_Bottles_FINAL.csv'))

In [55]:
recommended_vintages_M1_df, all_vintages_M1_df = Vintage_Dataframe(FINAL_wine_df_filtered_1Bottle_FINAL)
vintage_df_M1_final = Final_Vintage_Dataframe(recommended_vintages_M1_df, all_vintages_M1_df, FINAL_wine_df_FINAL, lower_date_bound = 1900, lower_rating_bound  = 3, lower_review_count = 1)

FINAL_training_data_Model1_FINAL, Columned_Weather_Data = Training_Data(FINAL_weather_data_FINAL, vintage_df_M1_final)
FINAL_training_data_Model1_FINAL.to_csv(os.path.join(final_df_file_path, 'FINAL_training_data_Model1_FINAL.csv'), index=False)

            id                                           seo_name  \
9     31188928  fisher-vineyards-wedding-vineyard-cabernet-sau...   
11    14441634  fisher-vineyards-wedding-vineyard-cabernet-sau...   
12     5250853  fisher-vineyards-wedding-vineyard-cabernet-sau...   
13     3988737  fisher-vineyards-wedding-vineyard-cabernet-sau...   
16     4191645  fisher-vineyards-wedding-vineyard-cabernet-sau...   
...        ...                                                ...   
1471   2801016         quinta-do-vallado-reserva-field-blend-2004   
1472  18456347         quinta-do-vallado-reserva-field-blend-2003   
1475   3305001         quinta-do-vallado-reserva-field-blend-2000   
1476   2840163         quinta-do-vallado-reserva-field-blend-1999   
1502   2003077           quinta-do-vallado-reserva-field-blend-uv   

                                                   name  \
9     Fisher Vineyards Wedding Vineyard Cabernet Sau...   
11    Fisher Vineyards Wedding Vineyard Cabernet Sau.

  Grouped_Daily_into_Monthly = monthly_df.groupby(monthly_df.index.to_period('M'))
  'WineType' : Grouped_Daily_into_Monthly['Top WineType'].first()[0],
  'District' : Grouped_Daily_into_Monthly['Top District'].first()[0],


In [103]:
all_vintages_M1_df

Unnamed: 0,id,seo_name,name,statistics,year,grapes,has_valid_ratings,source_url
0,176133729,fisher-vineyards-wedding-vineyard-cabernet-sau...,Fisher Vineyards Wedding Vineyard Cabernet Sau...,"{'status': 'BelowThreshold', 'ratings_count': ...",2022,,False,https://www.vivino.com/fisher-vineyards-weddin...
1,178542599,fisher-vineyards-wedding-vineyard-cabernet-sau...,Fisher Vineyards Wedding Vineyard Cabernet Sau...,"{'status': 'BelowThreshold', 'ratings_count': ...",2021,,False,https://www.vivino.com/fisher-vineyards-weddin...
2,164849086,fisher-vineyards-wedding-vineyard-cabernet-sau...,Fisher Vineyards Wedding Vineyard Cabernet Sau...,"{'status': 'BelowThreshold', 'ratings_count': ...",2020,,False,https://www.vivino.com/fisher-vineyards-weddin...
3,160198089,fisher-vineyards-wedding-vineyard-cabernet-sau...,Fisher Vineyards Wedding Vineyard Cabernet Sau...,"{'status': 'BelowThreshold', 'ratings_count': ...",2019,,False,https://www.vivino.com/fisher-vineyards-weddin...
4,169889209,fisher-vineyards-wedding-vineyard-cabernet-sau...,Fisher Vineyards Wedding Vineyard Cabernet Sau...,"{'status': 'BelowThreshold', 'ratings_count': ...",2018,,False,https://www.vivino.com/fisher-vineyards-weddin...
...,...,...,...,...,...,...,...,...
1498,81658908,quinta-do-vallado-reserva-field-blend-1865,Quinta do Vallado Reserva (Field Blend) 1865,"{'status': 'BelowThreshold', 'ratings_count': ...",1865,,False,https://www.vivino.com/quinta-do-vallado-reser...
1499,9330686,quinta-do-vallado-reserva-field-blend-1864,Quinta do Vallado Reserva (Field Blend) 1864,"{'status': 'BelowThreshold', 'ratings_count': ...",1864,,False,https://www.vivino.com/quinta-do-vallado-reser...
1500,22144953,quinta-do-vallado-reserva-field-blend-1842,Quinta do Vallado Reserva (Field Blend) 1842,"{'status': 'BelowThreshold', 'ratings_count': ...",1842,,False,https://www.vivino.com/quinta-do-vallado-reser...
1501,55224146,quinta-do-vallado-reserva-field-blend-1790,Quinta do Vallado Reserva (Field Blend) 1790,"{'status': 'BelowThreshold', 'ratings_count': ...",1790,,False,https://www.vivino.com/quinta-do-vallado-reser...


In [107]:
vintage_df_M1_final = Final_Vintage_Dataframe(recommended_vintages_M1_df, all_vintages_M1_df, FINAL_wine_df_FINAL, lower_date_bound = 1900, lower_rating_bound  = 3, lower_review_count = 1)

In [108]:
FINAL_wine_df_filtered_All_Bottles_FINAL.head()

Unnamed: 0.1,Unnamed: 0,Country Count,Top Region,Region Count,Top District,WineType,Producer,Rating,Price,URL,Lat,Long
0,0,6705,California,2496,Sonoma,acaibo,Trinite Estate,4.15,100.8,https://www.vivino.com/trinite-estate-acaibo/w...,38.51108,-122.847339
1,1,6705,California,2496,Sonoma,angeli (estate grown),Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
2,2,6705,California,2496,Sonoma,armé (estate grown),Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
3,3,6705,California,2496,Sonoma,christo (estate grown),Marietta,3.9,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
4,4,6705,California,2496,Sonoma,zinfandel,Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339


In [None]:
# recommended_vintages_M2_df, all_vintages_M2_df = Vintage_Dataframe(FINAL_wine_df_filtered_All_Bottles_FINAL)
# vintage_df_M2_final = Final_Vintage_Dataframe(recommended_vintages_M2_df, all_vintages_M2_df, FINAL_wine_df_FINAL, lower_date_bound = 1900, lower_rating_bound  = 3, lower_review_count = 1)
#
# FINAL_training_data_Model2_FINAL = Training_Data_Model2(Columned_Weather_Data, vintage_df_M2_final, FINAL_wine_df_filtered_All_Bottles_FINAL)
# FINAL_training_data_Model2_FINAL.to_csv(os.path.join(final_df_file_path, 'FINAL_training_data_Model2_FINAL.csv'), index=False)

An error occurred for URL: https://www.vivino.com/conde-de-los-andes-rioja/w/1142303 - ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response'))
An error occurred for URL: https://www.vivino.com/ysios-rioja-rioja/w/1204299 - HTTPSConnectionPool(host='www.vivino.com', port=443): Max retries exceeded with url: /ysios-rioja-rioja/w/1204299 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000141C8EF8310>: Failed to resolve 'www.vivino.com' ([Errno 11001] getaddrinfo failed)"))
An error occurred for URL: https://www.vivino.com/bodegas-franco-espanolas-rioja-bordon-gran-reserva/w/2628154 - HTTPSConnectionPool(host='www.vivino.com', port=443): Max retries exceeded with url: /bodegas-franco-espanolas-rioja-bordon-gran-reserva/w/2628154 (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x00000141C8F2F590>: Failed to resolve 'www.vivino.com' ([Errno 11001] getaddrinfo failed)"))
An error occurred for 

In [48]:
vintage_df_M2_final.head(30)

Unnamed: 0,ID,Producer_v,Year_v,Ratings Average,Reviews Count,Type,Producer/Year
0,159130136,Tate,2018,4.1,11,,Tate 2018
1,145104177,Tate,2016,4.1,28,top_ranked,Tate 2016
2,145104175,Tate,2015,4.1,30,,Tate 2015
3,141689264,Tate,2014,4.2,79,best_user_rated,Tate 2014
4,141689259,Tate,2013,4.1,70,most_user_rated,Tate 2013
5,18696276,Tate,2012,4.0,41,,Tate 2012
7,174235274,Arietta,2021,4.2,18,,Arietta 2021
8,168009128,Arietta,2019,4.2,32,best_user_rated,Arietta 2019
9,158341198,Arietta,2018,4.1,45,top_ranked,Arietta 2018
10,163152183,Arietta,2017,3.9,31,,Arietta 2017


In [49]:
FINAL_wine_df_filtered_All_Bottles_FINAL.head(5)

Unnamed: 0.1,Unnamed: 0,Country Count,Top Region,Region Count,Top District,WineType,Producer,Rating,Price,URL,Lat,Long
0,0,6705,California,2496,Sonoma,acaibo,Trinite Estate,4.15,100.8,https://www.vivino.com/trinite-estate-acaibo/w...,38.51108,-122.847339
1,1,6705,California,2496,Sonoma,angeli (estate grown),Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
2,2,6705,California,2496,Sonoma,armé (estate grown),Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
3,3,6705,California,2496,Sonoma,christo (estate grown),Marietta,3.9,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339
4,4,6705,California,2496,Sonoma,zinfandel,Marietta,4.0,62.99,https://www.vivino.com/marietta-cellars-game-t...,38.51108,-122.847339


In [44]:
FINAL_training_data_Model2_FINAL = Training_Data_Model2(Columned_Weather_Data, vintage_df_M2_final, FINAL_wine_df_filtered_All_Bottles_FINAL)
# FINAL_training_data_Model2_FINAL.to_csv(os.path.join(final_df_file_path, 'FINAL_training_data_Model2_FINAL.csv'), index=False)

In [47]:
FINAL_training_data_Model2_FINAL.head(30)

Unnamed: 0,Producer,District,Year,Rating,Price,Type
0,Tate,Napa,2018,4.1,165.31,
1,Tate,Napa,2016,4.1,165.31,top_ranked
2,Tate,Napa,2015,4.1,165.31,
3,Tate,Napa,2014,4.2,165.31,best_user_rated
4,Tate,Napa,2013,4.1,165.31,most_user_rated
5,Tate,Napa,2012,4.0,165.31,
6,Tate,Napa,2019,4.4,165.31,top_listed
7,Tate,Napa,2018,4.3,165.31,
8,Tate,Napa,2017,4.0,165.31,
9,Tate,Napa,2016,4.3,165.31,


In [28]:
vintage_df_M2_final.head(5)

Unnamed: 0,ID,Producer_v,Year_v,Ratings Average,Reviews Count,Type,Producer/Year
0,159130136,Tate,2018,4.1,11,,Tate 2018
1,145104177,Tate,2016,4.1,28,top_ranked,Tate 2016
2,145104175,Tate,2015,4.1,30,,Tate 2015
3,141689264,Tate,2014,4.2,79,best_user_rated,Tate 2014
4,141689259,Tate,2013,4.1,70,most_user_rated,Tate 2013


In [None]:
vintage_df_M2_final.to_csv(os.path.join(final_df_file_path, 'vintage_df_M2_final.csv'), index = True)

In [20]:
Columned_Weather_Data

Unnamed: 0,index,Producer,Year,WineType,District,January Max Temp (°C),January Min Temp (°C),January Avg Temp (°C),January Max Relative Humidity,January Min Relative Humidity,...,December Avg Relative Humidity,December Cumulative Rain (mm),December Cumulative Snow (mm),December Cumulative Precip (mm),December Avg Cloud Cover (%),December Max Wind Speed (Km/h),December Min Wind Speed (Km/h),December Avg Wind Speed (Km/h),December Avg Daylight Hours,Producer/Year
0,0,Fisher Vineyards,2000,cabernet sauvignon,Sonoma,15.929500,1.0295,9.091798,100.0,33.359756,...,92.596636,17.900000,0.00,17.900000,63.418011,14.494192,0.360000,5.201014,9.530056,Fisher Vineyards 2000
1,1,Fisher Vineyards,2001,cabernet sauvignon,Sonoma,19.029500,-1.5705,7.234540,100.0,30.501522,...,94.713735,247.700000,0.00,247.700000,78.353494,27.026386,0.000000,8.259984,9.531516,Fisher Vineyards 2001
2,2,Fisher Vineyards,2002,cabernet sauvignon,Sonoma,17.429500,-1.5705,7.662296,100.0,30.890038,...,96.753208,390.500000,0.77,391.270000,72.919354,36.258274,0.360000,9.049175,9.532616,Fisher Vineyards 2002
3,3,Fisher Vineyards,2003,cabernet sauvignon,Sonoma,19.929500,1.8795,11.077349,100.0,32.727215,...,96.938685,248.899999,0.00,248.899999,75.965053,27.959742,0.509117,8.032801,9.533591,Fisher Vineyards 2003
4,4,Fisher Vineyards,2004,cabernet sauvignon,Sonoma,16.579498,-0.8705,8.426812,100.0,35.712963,...,95.845905,233.400000,0.00,233.400000,56.950268,25.942488,0.360000,6.039130,9.529678,Fisher Vineyards 2004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571,571,Quinta do Vallado,2019,superior (organic vineyards),Freixo de Espada a Cinta,16.488500,-3.9115,5.454024,100.0,30.749796,...,95.350414,145.600000,0.00,145.600000,66.778226,34.454840,0.000000,7.201181,9.267922,Quinta do Vallado 2019
572,572,Quinta do Vallado,2020,superior (organic vineyards),Freixo de Espada a Cinta,17.688501,-2.4615,6.816322,100.0,35.641990,...,96.307585,60.400000,0.00,60.400000,64.638441,18.844202,0.360000,7.005120,9.263821,Quinta do Vallado 2020
573,573,Quinta do Vallado,2021,superior (organic vineyards),Freixo de Espada a Cinta,20.038500,-3.5615,6.512895,100.0,38.647420,...,96.930360,53.500000,0.00,53.500000,70.969086,23.333443,0.000000,6.326624,9.264957,Quinta do Vallado 2021
574,574,Quinta do Vallado,2022,superior (organic vineyards),Freixo de Espada a Cinta,20.588501,-2.0115,6.299387,100.0,22.319340,...,95.972280,191.900002,0.00,191.900002,82.533603,21.422270,0.000000,7.230849,9.265957,Quinta do Vallado 2022


---