In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
from datetime import date
plots.style.use('fivethirtyeight')

In [4]:
from datascience import *

# Words of Caution
- Remember to run the cell above. It's for setting up the environment so you can have access to what's needed for this lecture. For now, don't worry about what it means: we'll learn more about what's inside of it in the next few lectures.
- Data science is not just about code, so please don't go over this notebook by itself. Have the relevant textbook sections or lecture video at hand so that you can go over the discussion along with the code. Thank you! 

# Markdown
1. write bold and italic
1. create a list
    1. bullet
    1. numbered
1. create a table
1. write a formula
1. write code
1. add a link
1. add an image

**bold** *italic*

- a
- b
- c

1. num1
2. num2
3. num3

| c1 | c2 |
| -- | -- |
| a  | b  | 

$ E = m * c^2 $

```python
a = 1
b = 2
```

[Link](https://ucsb.instructure.com/courses/15364)

![](https://images.unsplash.com/photo-1608848461950-0fe51dfc41cb?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1974&q=80)

![](https://thumbor.forbes.com/thumbor/fit-in/900x510/https://www.forbes.com/advisor/wp-content/uploads/2023/07/top-20-small-dog-breeds.jpeg.jpg)

# Intro to Python

let's do some basic math operations on numbers: 
- add
- divide
- multiply
- raise
- evaluate inequalities

Let's do some operations on text (aka: string)
- add
- multiply

Optional: Let's do some operations on dates

In [11]:
4 / 8 *2

1.0

In [12]:
9 > 8

True

In [13]:
'this is text'

'this is text'

In [18]:
"this is also 'text'"

"this is also 'text'"

In [21]:
'this' + ' is text' + ' as well'

'this is text as well'

In [22]:
'ha' * 5

'hahahahaha'

In [23]:
'a' + 4

TypeError: can only concatenate str (not "int") to str

In [None]:
1/2

# Names (aka: variables)
- let's assign some values to variables/names
- let's use the variables
- let's overwrite variables

In [28]:
speed = 20
time = 2

distance = speed * time
distance

40

In [29]:
distance = 1

In [32]:
distance = True
distance

True

## Why Names?
- Calculate the annual salary for a person working full-time under California minimum wage of 15 USD/hour. 
- on 2023-01-01, the minimum wage was raised to 15.50 USD/hour. Recalculate the minimum wage.

In [33]:
hours_per_week = 40
weeks_per_year = 52

salary = 15.0 # USD/h

In [34]:
# now we compute the hours per year
hours_per_year = hours_per_week * weeks_per_year

In [None]:
weekly_wages = hours_per_week * salary
weekly_wages

In [None]:
yearly_wages = hours_per_year * salary
yearly_wages

# Comments

# Functions
- let's use some [built-in functions](https://docs.python.org/3/library/functions.html). E.g:
    - absolute value
    - the lower of two values
    - round    
- let's use keywords for arguments
- let's define our own function
- lets use the `help()` function to learn about the usage of functions

In [36]:
abs(-4)

4

In [37]:
min(7, 28)

7

In [57]:
round(ndigits=2, number=41.7543)

41.75

---
slides

---

# Tables A)
[Documentation](http://www.data8.org/datascience/reference-nb/datascience-reference.html#Table-Functions-and-Methods)

1. read a CSV from `data/cones.csv` into a table using `Table.read_table()`
1. show the first n_rows using `show()`
1. select a single column from the table using `select()`
1. select multiple columns from the table using `select()`
1. remove a column from the table using `drop()`
1. subset the table to only chocolate cones using `where()` 
1. sort the cones by price using `sort()`
    1. most expensive first
    1. cheapest first
1. add a new column containing your rating using `with_column()`
    
Remember that you can use the `help()` function or `?` to learn about each function

In [81]:
table.where('Flavor', 'chocolate').where('Color', 'light brown')

Flavor,Color,Price,Rating
chocolate,light brown,4.75,4


In [85]:
table.sort('Price', descending=True)

Flavor,Color,Price,Rating
chocolate,dark brown,5.25,3
strawberry,pink,5.25,2
chocolate,dark brown,5.25,5
chocolate,light brown,4.75,4
bubblegum,pink,4.75,1
strawberry,pink,3.55,1


In [87]:
table.sort?

[0;31mSignature:[0m [0mtable[0m[0;34m.[0m[0msort[0m[0;34m([0m[0mcolumn_or_label[0m[0;34m,[0m [0mdescending[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m [0mdistinct[0m[0;34m=[0m[0;32mFalse[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Return a Table of rows sorted according to the values in a column.

Args:
    ``column_or_label``: the column whose values are used for sorting.

    ``descending``: if True, sorting will be in descending, rather than
        ascending order.

    ``distinct``: if True, repeated values in ``column_or_label`` will
        be omitted.

Returns:
    An instance of ``Table`` containing rows sorted based on the values
    in ``column_or_label``.

>>> marbles = Table().with_columns(
...    "Color", make_array("Red", "Green", "Blue", "Red", "Green", "Green"),
...    "Shape", make_array("Round", "Rectangular", "Rectangular", "Round", "Rectangular", "Round"),
...    "Amount", make_array(4, 6, 12, 7, 9, 2),
...    "Price", make_a

In [78]:
t = table.drop('Color')

Flavor,Price,Rating
strawberry,3.55,1
chocolate,4.75,4
chocolate,5.25,3
strawberry,5.25,2
chocolate,5.25,5
bubblegum,4.75,1


In [79]:
table

Flavor,Color,Price,Rating
strawberry,pink,3.55,1
chocolate,light brown,4.75,4
chocolate,dark brown,5.25,3
strawberry,pink,5.25,2
chocolate,dark brown,5.25,5
bubblegum,pink,4.75,1


In [76]:
table = Table().read_table('data/cones.csv')
table.select('Flavor', 'Rating')

Flavor,Rating
strawberry,1
chocolate,4
chocolate,3
strawberry,2
chocolate,5
bubblegum,1


# Tables B)
1. read the CSV `data/skyscrapers.csv`
1. show the number of skyscrapers in the dataset using the *attribute* `num_rows`
1. sort the table by completion year. What can we learn?
1. subset the data to skyscrapers in 'Los Angeles'
    1. subset to skyscrapers in `Los Angeles` that were built in the year 1971
1. get data on the 'Empire State Building' in 'New York City'
1. rename the column 'completed' column using `relabel()`
1. get all skyscrapers in `New York City` and sort them by when they have been built

In [9]:
skyscrapers = Table().read_table('data/skyscrapers.csv')
skyscrapers.num_rows

200

In [19]:
skyscrapers.where('city', 'Los Angeles').where('completed', 1971)

name,material,city,height,completed
City National Tower,steel,Los Angeles,213.06,1971
Paul Hastings Tower,steel,Los Angeles,213.06,1971


In [16]:
skyscrapers.sort('completed', False).show()

name,material,city,height,completed
Metropolitan Life Tower,steel,New York City,213.36,1909
Woolworth Building,steel,New York City,241.4,1913
Terminal Tower,steel,Cleveland,215.8,1928
Chanin Building,steel,New York City,197.8,1929
Mercantile Building,steel,New York City,192.6,1929
Chrysler Building,steel,New York City,318.9,1930
The Trump Building,steel,New York City,282.55,1930
One Grand Central Place,steel,New York City,205.13,1930
Empire State Building,steel,New York City,381.0,1931
Twenty Exchange,steel,New York City,225.86,1931


# Visualizations
1. read the csv `data/movies_by_year.csv`
1. Plot the number of movies vs the total gross using `scatter()`
    - add a trendline
1. Plot the number of movies over time using `plot()`