# Topic 03: Control Flows, Functions & Statistics

## Lambdas (for our purposes)

In [1]:
# 1. Sorting!! 

import requests
macbeth = requests.get('http://www.gutenberg.org/cache/epub/2264/pg2264.txt').text
# Split the transcript into words
words = macbeth.split()
# Create a dictionary
word_counts = {}
# Iterate through the text of Macbeth
for word in words:
    # Update word counts
    word_counts[word] = word_counts.get(word, 0) + 1 #Get previous entry, update by 1
# Convert to a list
counts = list(word_counts.items())


In [2]:
counts

[('\ufeff', 1),
 ('***The', 2),
 ('Project', 23),
 ("Gutenberg's", 3),
 ('Etext', 5),
 ('of', 395),
 ("Shakespeare's", 6),
 ('First', 4),
 ('Folio***', 2),
 ('********************The', 2),
 ('Tragedie', 6),
 ('Macbeth*********************', 2),
 ('*******************************************************************', 2),
 ('THIS', 4),
 ('EBOOK', 2),
 ('WAS', 1),
 ('ONE', 1),
 ('OF', 9),
 ('PROJECT', 6),
 ("GUTENBERG'S", 1),
 ('EARLY', 1),
 ('FILES', 1),
 ('PRODUCED', 1),
 ('AT', 1),
 ('A', 29),
 ('TIME', 1),
 ('WHEN', 1),
 ('PROOFING', 1),
 ('METHODS', 1),
 ('AND', 1),
 ('TOOLS', 1),
 ('WERE', 1),
 ('NOT', 6),
 ('WELL', 1),
 ('DEVELOPED.', 1),
 ('THERE', 1),
 ('IS', 2),
 ('AN', 1),
 ('IMPROVED', 1),
 ('EDITION', 1),
 ('TITLE', 1),
 ('WHICH', 1),
 ('MAY', 3),
 ('BE', 2),
 ('VIEWED', 1),
 ('AS', 2),
 ('(#1533)', 1),
 ('at', 54),
 ('https://www.gutenberg.org/ebooks/1533', 1),
 ('This', 34),
 ('is', 185),
 ('our', 116),
 ('3rd', 1),
 ('edition', 2),
 ('most', 25),
 ('these', 30),
 ('plays.'

In [3]:
# Sort words by count
top_25 = sorted(counts, key = lambda x: x[1], reverse=True)[:25] # x[1] points to the second element in each tuple

In [4]:
# 2. Maps and filters

l = [1, 2, 3, 4, 5, 6]

In [5]:
list(map(lambda x: x + 2, l)) # MAPPING x + 2 to every element of the list

[3, 4, 5, 6, 7, 8]

In [6]:
list(filter(lambda x: x%2 == 0, l)) # FILTERING and only keeping elements that satisfy the condition

[2, 4, 6]

In [None]:
# 3. Pandas - we'll get there in Section 4/5!

## Making Lists from Lists
List comprehension (and dictionary comprehension!)

In [7]:
new = []
for x in l:
    new.append(x+2)
new

[3, 4, 5, 6, 7, 8]

In [8]:
[x+2 for x in l]

[3, 4, 5, 6, 7, 8]

In [9]:
{x: x+1 for x in l}

{1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7}

In [10]:
[x+2 for x in l if x%2==0] # adding a condition to only do x+2 if x is even

[4, 6, 8]

## That Soccer Dictionary

Nested dictionaries are a very common data structure!

In [None]:
soccer_match = [
  { "home_team": True,
    "away_team": False,
    "country": "France",
    "num_passes": 484,
    "passes_completed": 423,
    "fouls_committed": 16,
    "colors": ["blue", "white", "red"],
    "players": [
      {
        "name": "Hugo LLORIS",
        "captain": True,
        "shirt_number": 1,
        "position": "Goalie"
      },
      {
        "name": "Benjamin PAVARD",
        "captain": False,
        "shirt_number": 2,
        "position": "Defender"
      },
      {
        "name": "Raphael VARANE",
        "captain": False,
        "shirt_number": 4,
        "position": "Defender"
      },
      {
        "name": "Samuel UMTITI",
        "captain": False,
        "shirt_number": 5,
        "position": "Defender"
      },
      {
        "name": "Paul POGBA",
        "captain": False,
        "shirt_number": 6,
        "position": "Midfield"
      },
      {
        "name": "Antoine GRIEZMANN",
        "captain": False,
        "shirt_number": 7,
        "position": "Forward"
      },
      {
        "name": "Kylian MBAPPE",
        "captain": False,
        "shirt_number": 10,
        "position": "Forward"
      },
      {
        "name": "Ousmane DEMBELE",
        "captain": False,
        "shirt_number": 11,
        "position": "Forward"
      },
      {
        "name": "Corentin TOLISSO",
        "captain": False,
        "shirt_number": 12,
        "position": "Midfield"
      },
      {
        "name": "Ngolo KANTE",
        "captain": False,
        "shirt_number": 13,
        "position": "Midfield"
      },
      {
        "name": "Lucas HERNANDEZ",
        "captain": False,
        "shirt_number": 21,
        "position": "Defender"
      }
    ],
  },
  { "home_team": False,
    "away_team": True,
    "country": "Australia",
    "num_passes": 390,
    "passes_completed": 332,
    "fouls_committed": 19,
    "colors": ["green", "gold"],
    "players": [
      {
        "name": "Mathew RYAN",
        "captain": False,
        "shirt_number": 1,
        "position": "Goalie"
      },
      {
        "name": "Mark MILLIGAN",
        "captain": False,
        "shirt_number": 5,
        "position": "Defender"
      },
      {
        "name": "Mathew LECKIE",
        "captain": False,
        "shirt_number": 7,
        "position": "Forward"
      },
      {
        "name": "Robbie KRUSE",
        "captain": False,
        "shirt_number": 10,
        "position": "Forward"
      },
      {
        "name": "Andrew NABBOUT",
        "captain": False,
        "shirt_number": 11,
        "position": "Forward"
      },
      {
        "name": "Aaron MOOY",
        "captain": False,
        "shirt_number": 13,
        "position": "Midfield"
      },
      {
        "name": "Mile JEDINAK",
        "captain": True,
        "shirt_number": 15,
        "position": "Midfield"
      },
      {
        "name": "Aziz BEHICH",
        "captain": False,
        "shirt_number": 16,
        "position": "Defender"
      },
      {
        "name": "Joshua RISDON",
        "captain": False,
        "shirt_number": 19,
        "position": "Defender"
      },
      {
        "name": "Trent SAINSBURY",
        "captain": False,
        "shirt_number": 20,
        "position": "Defender"
      },
      {
        "name": "Tom ROGIC",
        "captain": False,
        "shirt_number": 23,
        "position": "Midfield"
      }
    ]
  }
]

In [None]:
# explore the data - its schema
print(type(soccer_match))
print(len(soccer_match))

print(soccer_match[0])


#### Write a function that counts how many players have even shirt numbers:

#### Write a function that appends player names and shirt numbers to a list until the sum of shirt numbers reach 100:

try it two ways, using: (1) a while loop and (2) a for loop with a break

## Statistics

* Measures of central tendency
    * What are some situations you might want to use mean vs median vs mode?
    
* Measures of dispersion
    * Quartiles/Percentiles
    * Variance & standard deviation