analysis file

In [1]:
import pandas as pd
import altair as alt

In [2]:
books = pd.read_csv('data/processed_data.csv')

In [3]:
books.head()

Unnamed: 0,title,author_name,year,rating,ratings
0,Magic Slays,Ilona Andrews,2011,4.4,70852
1,Silver Borne,Patricia Briggs,2010,4.38,102351
2,Perfect,Judith McNaught,1993,4.3,23473
3,The Will,Kristen Ashley,2014,4.29,29046
4,Immortal in Death,J.D. Robb,1996,4.28,51583


In [None]:
books['decade'] = (books['year'] // 10) * 10


Unnamed: 0,title,author_name,year,rating,ratings,decade
0,Magic Slays,Ilona Andrews,2011,4.4,70852,2010
1,Silver Borne,Patricia Briggs,2010,4.38,102351,2010
2,Perfect,Judith McNaught,1993,4.3,23473,1990
3,The Will,Kristen Ashley,2014,4.29,29046,2010
4,Immortal in Death,J.D. Robb,1996,4.28,51583,1990


In [6]:
decade_counts = books.groupby('decade').size().reset_index(name='count')
chart = alt.Chart(decade_counts).mark_arc().encode(
    theta=alt.Theta('count:Q', stack=True),
    color=alt.Color('decade:N', legend=alt.Legend(title='Decade')),
    tooltip=['decade:N', 'count:Q']
).properties(
    title='Proportion of Romance Novels released by Decade',
    width=400,
    height=400
)

chart.show()

In [9]:
books.head(10)

Unnamed: 0,title,author_name,year,rating,ratings,decade
0,Magic Slays,Ilona Andrews,2011,4.4,70852,2010
1,Silver Borne,Patricia Briggs,2010,4.38,102351,2010
2,Perfect,Judith McNaught,1993,4.3,23473,1990
3,The Will,Kristen Ashley,2014,4.29,29046,2010
4,Immortal in Death,J.D. Robb,1996,4.28,51583,1990
5,The Sweet Gum Tree,Katherine Allred,2005,4.27,29341,2000
6,Hunting Ground,Patricia Briggs,2009,4.27,66592,2000
7,Driven,K. Bromberg,2013,4.26,59370,2010
8,Play of Passion,Nalini Singh,2010,4.23,27951,2010
9,Never Love a Highlander,Maya Banks,2011,4.21,25853,2010


In [10]:
author_ratings = books.groupby('author_name')['ratings'].sum().reset_index()
top_10_authors = author_ratings.nlargest(10, 'ratings')
chart = alt.Chart(top_10_authors).mark_bar().encode(
    x=alt.X('ratings:Q', title='Total Number of Ratings'),
    y=alt.Y('author_name:N', sort='-x', title='Author'),
    tooltip=['author_name:N', 'ratings:Q']
).properties(
    title='Top 10 Most-Rated Authors',
    width=600,
    height=400
)

chart.show()

In [None]:


yearly_counts = books.groupby('year').size().reset_index(name='count')

chart = alt.Chart(yearly_counts).mark_line(
    color='steelblue',
    strokeWidth=3,
    point=alt.OverlayMarkDef(color='steelblue', size=50)
).encode(
    x=alt.X('year:O',
            title='Year',
            axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('count:Q',
            title='Number of Romance Books Published'),
    tooltip=[
        alt.Tooltip('year:O', title='Year'),
        alt.Tooltip('count:Q', title='Romance Books Published')
    ]
).properties(
    title={
        "text": "Romance Books Published Over Time",
        "fontSize": 18,
        "font": "Arial",
        "fontWeight": "bold"
    },
    width=700,
    height=400
).configure_view(
    strokeWidth=0
).configure_axis(
    grid=True,
    gridOpacity=0.3
)

chart.show()