### Load the Vine Table CSV file into a DataFrame

In [1]:
# Import your dependencies.
import pandas as pd

In [2]:
# Read in the vine table csv file. 
vine_df = pd.read_csv("vine_table.csv")
vine_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
0,RTIS3L2M1F5SM,5,0,0,N,Y
1,R1ZV7R40OLHKD,5,0,0,N,Y
2,R3BH071QLH8QMC,1,0,1,N,Y
3,R127K9NTSXA2YH,3,0,0,N,Y
4,R32ZWUXDJPW27Q,4,0,0,N,Y
...,...,...,...,...,...,...
1785992,RPC430LWZJ60T,5,1,1,N,N
1785993,R347MZT5FH6HRJ,5,2,2,N,N
1785994,RJ5BETZP0VIUS,1,0,2,N,N
1785995,R85QTDO2KZMGO,1,1,3,N,N


In [3]:
# Check the datatypes
vine_df.dtypes

review_id            object
star_rating           int64
helpful_votes         int64
total_votes           int64
vine                 object
verified_purchase    object
dtype: object

### Filter by votes

In [4]:
# Create a new DataFrame that retrieves all the rows where the total votes is equal to or greater than 20.
df1 = vine_df.loc[vine_df["total_votes"] >= 20]
df1

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
289,R3EZ0EPYLDA34S,1,14,31,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
...,...,...,...,...,...,...
1785688,R3GYUWLD9FWCPS,5,30,30,N,N
1785714,R26KS4Q9G04FIV,2,8,21,N,N
1785788,RU0J1ZMBCLD27,5,27,29,N,N
1785806,R1D69XH2THYKTG,5,18,20,N,N


In [5]:
#  Create a new DataFrame that retrieves all the rows where 
# the number of helpful votes divided by total votes is equal to or greater than 0.5
df2 = df1.loc[(df1["helpful_votes"]/df1["total_votes"] >= 0.5)]
df2

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y
...,...,...,...,...,...,...
1785641,RCMDCDJR16IKW,4,27,30,N,N
1785688,R3GYUWLD9FWCPS,5,30,30,N,N
1785788,RU0J1ZMBCLD27,5,27,29,N,N
1785806,R1D69XH2THYKTG,5,18,20,N,N


### Analyze the Vine Reviews

In [6]:
# Create a DataFrame that retrieves all the rows where a review was written as part of the Vine program (vine == Y).
paid_df = df2.loc[df2["vine"]=="Y"]
paid_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
32611,R3KKUSGFZWSUIY,5,56,63,Y,N
33112,R10FO5UKKVZBK2,3,23,23,Y,N
69680,RM4KSGEOR7MU1,5,19,24,Y,N
155361,RG7VRMYLEXD23,4,22,26,Y,N
239327,R11O4YSCPSNL6L,3,20,26,Y,N
...,...,...,...,...,...,...
1456862,RLPTVGLU0JQIP,3,42,45,Y,N
1463333,R3ASJ9SENYYYI0,5,40,46,Y,N
1481162,RNU8PK609WT6P,4,347,362,Y,N
1506354,R8YT75NJW0CM9,4,37,40,Y,N


In [7]:
# Create a DataFrame that retrieves all the rows where a review wasn't written as part of the Vine program (vine == N).
unpaid_df = df2.loc[df2["vine"]=="N"]
unpaid_df

Unnamed: 0,review_id,star_rating,helpful_votes,total_votes,vine,verified_purchase
55,R4PKAZRQJJX14,1,21,34,N,N
74,R2CI0Y288CC7E2,1,21,35,N,Y
209,R127WEQY2FM1T3,1,147,175,N,Y
483,R2FJ94555FZH32,2,55,60,N,N
537,R1U3AR67RE273L,1,51,65,N,Y
...,...,...,...,...,...,...
1785641,RCMDCDJR16IKW,4,27,30,N,N
1785688,R3GYUWLD9FWCPS,5,30,30,N,N
1785788,RU0J1ZMBCLD27,5,27,29,N,N
1785806,R1D69XH2THYKTG,5,18,20,N,N


### Determine the percentage of five-star reviews among Vine reviews

In [8]:
# Retrieve the number of 5 star ratings from the DataFrame that has a written review.
paid_five_star_number = paid_df.loc[(paid_df['star_rating']== 5)]["star_rating"].count()

# Retrieve the total number of star ratings from the DataFrame that has a written review.
paid_number = paid_df["star_rating"].count()

# Calculate the percentage of five star reviews.
percentage_five_star_vine = paid_five_star_number / paid_number * 100

# Print the results. 
print(paid_number)
print(paid_five_star_number)
print(percentage_five_star_vine)

94
48
51.06382978723404


### Determine the percentage of five-star reviews among non-Vine reviews

In [9]:
# Retrieve the number of 5 star ratings from the DataFrame that doesn't have a written review.
unpaid_five_star_number = unpaid_df.loc[(unpaid_df['star_rating']== 5)]["star_rating"].count()

# Retrieve the total number of star ratings from the DataFrame that doesn't have a written review.
unpaid_number = unpaid_df["star_rating"].count()

# Calculate the percentage of five star reviews.
percentage_five_star_non_vine = unpaid_five_star_number / unpaid_number * 100

# Print the results. 
print(unpaid_number)
print(unpaid_five_star_number)
print(percentage_five_star_non_vine)

40471
15663
38.701786464381904
