# From SQL to pandas challenge 6

In [1]:
# import libraries
import pandas as pd

# load data
# This code is made to load our data stored on Google Drive
def gd_path(file_id):
    """Generate a shareable link from Google Drive file id."""
    return f"https://drive.google.com/uc?export=download&id={file_id}"

# Google Drive file ids
files_id = {
    "titles": "1PLdn50N9GRa53ZbuVWo0l47F_IXdvlEm",
    "stores": "1f-GCgip7O93CpbAkYvOsc21eKnSOSHsQ",
    "sales": "1fzFc9rwYmVIPaGOFmhLVxCi3kg19vNU2",
    "jobs": "1V1Za8hUdXD-vJOyRdX4aQV5wanIff2eM",
    "authors": "1fEF89Nhe61EebAljKlwFwfEuokK0o6aJ"
}

# Read data from Google Drive
stores = pd.read_csv(gd_path(files_id["stores"]), sep=";")
titles = pd.read_csv(gd_path(files_id["titles"]), sep=";")
authors = pd.read_csv(gd_path(files_id["authors"]), sep=";")
sales = pd.read_csv(gd_path(files_id["sales"]), sep=";")
jobs = pd.read_csv(gd_path(files_id["jobs"]), sep=";")

## 1.&nbsp;Select the name and state of all stores located in either California (CA) or Oregon (OR)

In [2]:
stores.loc[stores.state.isin(['CA', 'OR']), ['stor_name', 'stor_address', 'state']]

Unnamed: 0,stor_name,stor_address,state
1,Barnum's,567 Pasadena Ave.,CA
2,News & Brews,577 First St.,CA
4,Fricative Bookshop,89 Madison St.,CA
5,Bookbeat,679 Carson St.,OR


### Hint:

In SQL the syntax is:

```sql
SELECT stor_name,stor_address,state  
FROM stores  
WHERE state IN("CA","OR");
```

## 2.&nbsp;Using `.isin()`, select all titles of type "psychology", "mod_cook" and "trad_cook"

In [3]:
titles.loc[titles.type.isin(["psychology", "mod_cook", "trad_cook"]), ['title', 'type']]

Unnamed: 0,title,type
4,Silicon Valley Gastronomic Treats,mod_cook
5,The Gourmet Microwave,mod_cook
10,Computer Phobic AND Non-Phobic Individuals: Be...,psychology
11,Is Anger the Enemy?,psychology
12,Life Without Fear,psychology
13,Prolonged Data Deprivation: Four Case Studies,psychology
14,Emotional Security: A New Algorithm,psychology
15,"Onions, Leeks, and Garlic: Cooking Secrets of ...",trad_cook
16,Fifty Years in Buckingham Palace Kitchens,trad_cook
17,"Sushi, Anyone?",trad_cook


### Hint:

In SQL the syntax is:

```sql
SELECT title,type
FROM titles
WHERE type IN('psychology','mod_cook','trad_cook');
```

## 3.&nbsp;Select all the authors from the authors table that do not come from the cities Salt Lake City, Ann Arbor, and Oakland.

In [4]:
authors.loc[~authors.city.isin(['Salt Lake City', 'Ann Arbor', 'Oakland'])]

Unnamed: 0,au_id,au_lname,au_fname,phone,address,city,state,zip,contract
0,172-32-1176,White,Johnson,408 496-7223,10932 Bigge Rd.,Menlo Park,CA,94025,1
2,238-95-7766,Carson,Cheryl,415 548-7723,589 Darwin Ln.,Berkeley,CA,94705,1
3,267-41-2394,O'Leary,Michael,408 286-2428,22 Cleveland Av. #14,San Jose,CA,95128,1
5,341-22-1782,Smith,Meander,913 843-0462,10 Mississippi Dr.,Lawrence,KS,66044,0
6,409-56-7008,Bennet,Abraham,415 658-9932,6223 Bateman St.,Berkeley,CA,94705,1
7,427-17-2319,Dull,Ann,415 836-7128,3410 Blonde St.,Palo Alto,CA,94301,1
8,472-27-2349,Gringlesby,Burt,707 938-6445,PO Box 792,Covelo,CA,95428,1
9,486-29-1786,Locksley,Charlene,415 585-4620,18 Broadway Av.,San Francisco,CA,94130,1
10,527-72-3246,Greene,Morningstar,615 297-2723,22 Graybar House Rd.,Nashville,TN,37215,0
11,648-92-1872,Blotchet-Halls,Reginald,503 745-6402,55 Hillsdale Bl.,Corvallis,OR,97330,1


### Hint:

In SQL the syntax is:

```sql
SELECT *
FROM authors
WHERE city NOT IN ('Salt Lake City', 'Ann Arbor', 'Oakland');
```

## 4.&nbsp;Select all the order numbers with a quantity sold between 25 and 45 from the table sales

In [5]:
sales.loc[sales.qty.between(25, 45), ['ord_num', 'qty']]

Unnamed: 0,ord_num,qty
5,P2121,40
9,N914014,25
11,P3087a,25
13,P3087a,25
16,X999,35
19,P723,25
20,QA879.1,30


### Hint:

In SQL the syntax is:

```sql
SELECT ord_num, qty
FROM sales
WHERE qty BETWEEN 25 AND 45;
```

## 5.&nbsp;Select all the sales between 1993-03-11 and 1994-09-13

In [7]:
sales.ord_date = pd.to_datetime(sales.ord_date) # parse strings such that time part does not matter

In [8]:
sales.loc[sales.ord_date.between("1993-03-11", "1994-09-13")]

Unnamed: 0,stor_id,ord_num,ord_date,qty,payterms,title_id
1,6380,722a,1994-09-13,3,Net 60,PS2091
2,7066,A2976,1993-05-24,50,Net 30,PC8888
3,7066,QA7442.3,1994-09-13,75,ON invoice,PS2091
10,7131,P3087a,1993-05-29,20,Net 60,PS1372
11,7131,P3087a,1993-05-29,25,Net 60,PS2106
12,7131,P3087a,1993-05-29,15,Net 60,PS3333
13,7131,P3087a,1993-05-29,25,Net 60,PS7777
14,7896,QQ2299,1993-10-28,15,Net 60,BU7832
15,7896,TQ456,1993-12-12,10,Net 60,MC2222
19,8042,P723,1993-03-11,25,Net 30,BU1111


### Hint:

In SQL the syntax is:

```sql
SELECT *
FROM sales
WHERE ord_date BETWEEN "1993-03-11" AND "1994-09-13";
```

## 6.&nbsp;Select all job descriptions with a maximum level ("max_lvl") between 150 and 200.

In [27]:
jobs.loc[jobs.max_lvl.between(150, 200), ['job_desc', 'max_lvl']]

Unnamed: 0,job_desc,max_lvl
6,Marketing Manager,200
7,Public Relations Manager,175
8,Acquisitions Manager,175
9,Productions Manager,165
10,Operations Manager,150


### Hint:

In SQL the syntax is:

```sql
SELECT job_desc, max_lvl
FROM jobs
WHERE max_lvl BETWEEN 150 AND 200;
```