# From SQL to pandas challenge 7

In [1]:
# import libraries
import pandas as pd

# load data
# This code is made to load our data stored on Google Drive
def gd_path(file_id):
    """Generate a shareable link from Google Drive file id."""
    return f"https://drive.google.com/uc?export=download&id={file_id}"

# Google Drive file ids
files_id = {
    "titles": "1PLdn50N9GRa53ZbuVWo0l47F_IXdvlEm",
    "sales": "1fzFc9rwYmVIPaGOFmhLVxCi3kg19vNU2",
    "employee": "1h9mUjsVqpP74b1w0x7KOw37n_n9Ulkt5",
    "authors": "1fEF89Nhe61EebAljKlwFwfEuokK0o6aJ"
}

# Read data from Google Drive
authors = pd.read_csv(gd_path(files_id["authors"]), sep=";")
titles = pd.read_csv(gd_path(files_id["titles"]), sep=";")
sales = pd.read_csv(gd_path(files_id["sales"]), sep=";")
employee = pd.read_csv(gd_path(files_id["employee"]), sep=";")

## 1.&nbsp;Find the total amount of authors for each state

In [3]:
authors.value_counts('state')

state
CA    15
UT     2
IN     1
KS     1
MD     1
MI     1
OR     1
TN     1
dtype: int64

In [4]:
authors.groupby('state').au_id.count()

state
CA    15
IN     1
KS     1
MD     1
MI     1
OR     1
TN     1
UT     2
Name: au_id, dtype: int64

### Hint:

In SQL the syntax is:

```sql
SELECT COUNT(au_id) , state
FROM authors
group by state;
```

## 2.&nbsp;Find the total amount of authors by each state and order them in descending order

In [5]:
authors.groupby('state').au_id.count().sort_values(ascending=False)

state
CA    15
UT     2
IN     1
KS     1
MD     1
MI     1
OR     1
TN     1
Name: au_id, dtype: int64

### Hint:

In SQL the syntax is:

```sql
SELECT COUNT(au_id) Total,state
FROM authors
GROUP BY state
ORDER BY (total) DESC;
```

## 3.&nbsp;What's the price of the most expensive title from each publisher?

In [7]:
titles.groupby('pub_id').price.max()

pub_id
736     19.99
877     21.59
1389    22.95
Name: price, dtype: float64

### Hint:

In SQL the syntax is:

```sql
SELECT MAX(price), pub_id
FROM titles
GROUP BY pub_id;
```

## 4.&nbsp;Find out the top 3 stores with the most sales

In [11]:
sales.groupby('stor_id').qty.sum().sort_values(ascending=False).head(3)

stor_id
7131    130
7066    125
7067     90
Name: qty, dtype: int64

### Hint:

In SQL the syntax is:

```sql
SELECT stor_id, SUM(qty)
FROM sales
GROUP BY stor_id
ORDER BY SUM(qty) DESC
LIMIT 3;
```

## 5.&nbsp;Find the average job level for each job_id from the employees table.
    Order the jobs in ascending order by its average job level.

In [13]:
employee.groupby('job_id').job_lvl.mean().sort_values()

job_id
12     55.666667
13     66.333333
14     74.666667
10    101.750000
11    121.750000
9     123.250000
8     125.250000
7     142.500000
6     176.000000
5     187.285714
3     200.000000
2     215.000000
4     227.000000
Name: job_lvl, dtype: float64

### Hint:

In SQL the syntax is:

```sql
SELECT AVG(job_lvl), job_id
FROM employee
GROUP BY job_id
ORDER BY AVG(job_lvl);
```

## 6.&nbsp;For each type (business, psychology…), find out how many books each publisher has

In [15]:
titles.groupby(['pub_id', 'type']).title_id.count()

pub_id  type        
736     business        1
        psychology      4
877     UNDECIDED       1
        mod_cook        2
        psychology      1
        trad_cook       3
1389    business        3
        popular_comp    3
Name: title_id, dtype: int64

### Hint:

In SQL the syntax is:

```sql
SELECT pub_id, type, COUNT(title_id)
FROM titles
GROUP BY pub_id , type;
```

## 7.&nbsp;Add the average price of each publisher - book type combination from your previous query

In [16]:
titles.groupby(['pub_id', 'type']).agg({'title_id': 'count', 'price': 'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,title_id,price
pub_id,type,Unnamed: 2_level_1,Unnamed: 3_level_1
736,business,1,2.99
736,psychology,4,11.4825
877,UNDECIDED,1,0.0
877,mod_cook,2,11.49
877,psychology,1,21.59
877,trad_cook,3,15.963333
1389,business,3,17.31
1389,popular_comp,3,14.316667


### Hint:

In SQL the syntax is:

```sql
SELECT  AVG(price), COUNT(*), pub_id,type
FROM titles
GROUP BY pub_id, type;
```

# BONUS

This is part of the section Advanced Pandas.

## 8.&nbsp;From your previous query, keep only the combinations of publisher - book type with an average price higher than 12

In [34]:
(
    titles.groupby(['pub_id', 'type'])
    .agg(count = ('title_id', 'count'), avg_price = ('price', 'mean'))
    [lambda results: results['avg_price'] > 12]
)

Unnamed: 0_level_0,Unnamed: 1_level_0,count,avg_price
pub_id,type,Unnamed: 2_level_1,Unnamed: 3_level_1
877,psychology,1,21.59
877,trad_cook,3,15.963333
1389,business,3,17.31
1389,popular_comp,3,14.316667


### Hint:

In SQL the syntax is:

```sql
SELECT pub_id,type, COUNT(*)
FROM titles
GROUP BY pub_id, type
HAVING AVG(price) > 12;
```

## 9.&nbsp;Order the results of your previous query by these two criteria:
1. Count of books, descendingly
2. Average price, descendingly

In [27]:
titles.groupby(['pub_id', 'type']) \
  .agg(count = ('title_id', 'count'), avg_price = ('price', 'mean')) \
  [lambda results: results['avg_price'] > 12] \
  .sort_values(by=['count', 'avg_price'], ascending=[False, False])

Unnamed: 0_level_0,Unnamed: 1_level_0,count,avg_price
pub_id,type,Unnamed: 2_level_1,Unnamed: 3_level_1
1389,business,3,17.31
877,trad_cook,3,15.963333
1389,popular_comp,3,14.316667
877,psychology,1,21.59


### Hint:

In SQL the syntax is:

```sql
SELECT  pub_id, type, COUNT(*), AVG(price)
FROM titles
GROUP BY pub_id, type
HAVING AVG(price) > 12
ORDER BY COUNT(*) DESC, AVG(price) DESC;
```

## 10.&nbsp;Some authors have a contract, while others don't - it's indicated in the "contract" column of the authors table.
     
    Select all the states and cities where there are 2 or more contracts overall

In [33]:
authors.groupby(['city', 'state']).agg(contracts=('contract', 'sum')) \
  [lambda results: results['contracts'] >= 2]

Unnamed: 0_level_0,Unnamed: 1_level_0,contracts
city,state,Unnamed: 2_level_1
Berkeley,CA,2
Oakland,CA,4
Palo Alto,CA,2
Salt Lake City,UT,2


### Hint:

In SQL the syntax is:

```sql
SELECT SUM(contract),city,state
FROM authors
GROUP BY state,city
HAVING SUM(contract)>1;
```