In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

# Indeed

#### Create Skills Dictionary

In [2]:
skills_keywords_dict = {
                            # tech skills
                            'Excel': ['Excel'],
                            'Python': ['Python'],
                            'R': ['R ', ' R ', 'R,', 'R/'], # 'R' surrounded by spaces and signs
                            'Ruby': ['Ruby'],
                            'Java/Scala': ['Java', 'JVM', 'Scala'],
                            'C/C++': ['C/C++', 'C++', ' C '],   # 'C' surrounded by spaces and signs
                            'MATLAB': ['MATLAB'],
                            'SAS': ['SAS'],
                            'SQL/databases': ['SQL', 'databases'],
                            'SPSS': ['SPSS'],
                            'Stata': ['Stata'],
                            'RapidMiner': ['RapidMiner', 'Rapid Miner'],
                            'Machine Learning': ['Machine Learning', 'ML'],
                            'Data Mining/Analytics': ['Data Mining', 'DM', 'Analytics'],
                            'NLP': ['Natural Language Processing', 'NLP'],
                            'Visualisation': ['Visualisation', 'Visualization'],
                            'Big Data': ['Big Data', 'Spark', 'kafka', 'Hive', 
                                         'beam', 'Hadoop', 'MapReduce', 'Hbase'],
                            'Cloud': ['Cloud', 'AWS', 'GCP', 'Azure ', 'Google Cloud'],
                            'REST': ['REST', 'flask', 'Sinatra'],
                            'Math': ['Algebra', 'Statistics', 'Operations research'],
                            'DevOps': ['DevOps', 'TDD', 'test-driven', 'GitHub'],  
                            
                            # soft skills
                            'communication': ['communication', 'presentation', 'team player'],
                            'problem solving': ['problem solving'],
                            'project_management': ['project management'],
                            'leadership': ['leadership'],
                            'consulting': ['consulting'],
                            'French':['french', 'French']
                        }

#### Get urls from Indeed pages

In [3]:
# 20 job postings per page
def get_urls(url):
    # get html back
    html = requests.get(url)
    html = html.text
    soup = BeautifulSoup(html, 'lxml')

    # extract the number of search results
    nums = soup.find('div', {'id':'searchCount'}).text
    nums = int(nums.split()[3])

    # add the common part between all search pages
    base_url = "https://www.indeed.ca" + soup.find('div', {'class': 'pagination'}).find('a').get('href')[:-2]
    
    urls = []
    urls.append(base_url)
    for i in range(20, nums, 20):
        urls.append(base_url + str(i))
    
    return urls, nums

In [4]:
url = 'https://ca.indeed.com/jobs?q=Data+scientist&l=Canada'
urls, nums = get_urls(url)

#### Get link for each job and its html text

In [5]:
def get_job_links_info(urls):
    
    dic = {}
    # loop over all page-urls
    for page_url in urls:

        # get the HTML of the search results page
        page = requests.get(page_url)
        content = page.text
        soup = BeautifulSoup(content, 'lxml')

        # find all <div> tags containing each job posting links and feed them to the function 'scrape_job_info'
        results = soup.find_all('div',{'class': 'title'})

        for job in results:

            job_link = "https://www.indeed.ca" + job.find('a')['href']

            job_page = requests.get(job_link)
            job_content = job_page.text
            #soup_job = BeautifulSoup(job_content,'lxml')
            #job_desc = soup_job.find('div',{'class':"jobsearch-jobDescriptionText"}).get_text()
            dic[job_link] = job_content
            
    return dic

In [6]:
info_dict = get_job_links_info(urls)

#### Get job_title, company_name, skills from info_dict

In [7]:
def get_skills_from_job(jobs_dict, skills_dict):
    
    results_dict = {} 
    
    # loop over all key(link)-value(HTML code) pairs in scraping results
    for link, job_html_text in jobs_dict.items():
        
    
        soup_job = BeautifulSoup(job_html_text, 'lxml')
        
        results_dict[link] = {} 
        
        # extract job title
        try:
            results_dict[link]['job_title'] = soup_job.find('h3', 
                                {'class':"icl-u-xs-mb--xs icl-u-xs-mt--none jobsearch-JobInfoHeader-title"}).text   
        except IndexError:
            results_dict[link]['job_title'] = 'Not found'
            
         # extract company name
        try:
            results_dict[link]['company_name'] = soup_job.find('div', 
                                                                   {'class': 'icl-u-lg-mr--sm icl-u-xs-mr--xs'}).text 
        except IndexError:
            results_dict[link]['company_name'] = 'Not found'
                
        # search for the skills
        job_text = soup_job.text
        for skill_category, skills in skills_dict.items():
            
            category_found = 0  
            
            for skill in skills:        
                if job_text.find(skill) != -1: 
                    
                    category_found = 1
            
            results_dict[link][skill_category] = category_found 
    
    return results_dict 

In [8]:
results_dict = get_skills_from_job(info_dict, skills_keywords_dict)
df_indeed = pd.DataFrame(results_dict).T.reset_index()

In [29]:
df_indeed.to_csv('indeed.csv')

#### Indeed Example

In [10]:
page = requests.get('https://www.indeed.ca/jobs?q=Data+scientist&l=Canada&start=')
content = page.text
soup = BeautifulSoup(content, 'lxml')
results = soup.find('div',{'class': 'title'})
job_link = "https://www.indeed.ca" + results.find('a')['href']
job_page = requests.get(job_link)
job_content = job_page.text
soup_job = BeautifulSoup(job_content,'lxml')

In [11]:
results

<div class="title">
<a class="jobtitle turnstileLink" data-tn-element="jobTitle" href="/pagead/clk?mo=r&amp;ad=-6NYlbfkN0C7nhuITA1a93dL10dRXTH-RWH4RI4W-baytkusaUeAAtxqSbyONvqD9s0vYo9ZxE_9vfJLiITbWzk-Pd151amlv55zjMAkwFeNXHITobkC5eC1P_i6lXxPblHcR4s7r0pe5I91O5yExkfzKL5XLO0ugsW7oT3uoMzYmm9rvHQYcfeL33EVcawW4SA8oZIkQ8Ep-2yiXMJYAlwsEIErIRGpdaWr-uYCaXfmJhXBxTbP-JC3qixjZuoI4chnN3ma4eBKwXMTZRha8FaynAnnXCJVJ6D7ttZNRgJx0HFmecSM6NtcXaqTZIIbcAmsI2U5_lOGyaxEwxRCs1AwGYPXX7-2ZTQwBIJGBH6E3N34rAe2QuV_lAoEgjhAcl0LRUQD5jQN7Yr8YMjR8vCNySoWIR7Jc-jCAlToG3cM0L9mtO3QgC9Zzbtf3t3I2uwFXWX9ymfCsmYe9BijN2tLMtBHg0hL&amp;p=0&amp;fvj=1&amp;vjs=3" id="sja0" onclick="setRefineByCookie([]); sjoc('sja0', 1); convCtr('SJ'); rclk(this,jobmap[0],true,1);" onmousedown="sjomd('sja0'); clk('sja0'); rclk(this,jobmap[0],1);" rel="noopener nofollow" target="_blank" title="Data Scientist with Backend Experience">
Data <b>Scientist</b> with Backend Experience</a>
</div>

In [12]:
soup.find('div', {'class': 'pagination'}).find('a').get('href')[:-2]

'/jobs?q=Data+scientist&l=Canada&start='

In [13]:
soup_job.find('h3', {'class' : 'icl-u-xs-mb--xs icl-u-xs-mt--none jobsearch-JobInfoHeader-title'}).text

'Data Scientist with Backend Experience'

In [14]:
soup_job.find('div',{'class':"icl-u-lg-mr--sm icl-u-xs-mr--xs"}).text

'Buy With Impact'

In [15]:
soup_job.text.find('SQL') != -1

True

# Linkedin

In [16]:
# 20 job postings per page
def get_urls_linkedin(url):
    # get html back
    html = requests.get(url)
    html = html.text
    soup = BeautifulSoup(html, 'lxml')

    # extract the number of search results
    nums = soup.find('h1').find('span').text
    nums = int(nums)

    # add the common part between all search pages
    base_url = 'https://ca.linkedin.com/jobs/data-scientist-jobs?position=1&pageNum='
    
    urls = []
    for i in range(0, nums, 25):
        urls.append(base_url + str(i))
    
    return urls, nums

In [17]:
url = "https://ca.linkedin.com/jobs/data-scientist-jobs"
urls,nums = get_urls_linkedin(url)

In [18]:
def get_job_links_info_linkedin(urls):
    
    dic = {}
    # loop over all page-urls
    for page_url in urls:

        # get the HTML of the search results page
        page = requests.get(page_url)
        content = page.text
        soup = BeautifulSoup(content, 'lxml')

        # find all <div> tags containing each job posting links and feed them to the function 'scrape_job_info'
        results = soup.find_all('a',{'class':"result-card__full-card-link"})

        for job in results:

            job_link = job['href']

            job_page = requests.get(job_link)
            job_content = job_page.text
            #soup_job = BeautifulSoup(job_content,'lxml')
            #job_desc = soup_job.find('div',{'class':"jobsearch-jobDescriptionText"}).get_text()
            dic[job_link] = job_content
            
    return dic

In [19]:
linkedin_dict = get_job_links_info_linkedin(urls)

In [20]:
len(linkedin_dict)

825

In [21]:
def get_skills_from_job_linkedin(jobs_dict, skills_dict):
    
    results_dict = {} 
    
    # loop over all key(link)-value(HTML code) pairs in scraping results
    for link, job_html_text in jobs_dict.items():
        
    
        soup_job = BeautifulSoup(job_html_text, 'lxml')
        
        results_dict[link] = {} 
        
        # extract job title
        try:
            results_dict[link]['job_title'] = soup_job.find('h1',{'class':'topcard__title'}).text   
        except IndexError:
            results_dict[link]['job_title'] = 'Not found'
            
         # extract company name
        try:
            results_dict[link]['company_name'] = soup_job.find('a',{'class':'topcard__org-name-link topcard__flavor--black-link'}).text
        except AttributeError:
            results_dict[link]['company_name'] = soup_job.find('span',{'class':'topcard__flavor'}).text
                
        # search for the skills
        job_text = soup_job.text
        for skill_category, skills in skills_dict.items():
            
            category_found = 0  
            
            for skill in skills:        
                if job_text.find(skill) != -1: 
                    
                    category_found = 1
            
            results_dict[link][skill_category] = category_found 
    
    return results_dict 

In [22]:
results_dict = get_skills_from_job_linkedin(linkedin_dict, skills_keywords_dict)
df_linkedin = pd.DataFrame(results_dict).T.reset_index()

In [30]:
df_linkedin.to_csv('linkedin.csv')

#### Linkedin Example 

In [96]:
url = "https://ca.linkedin.com/jobs/data-scientist-jobs"
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

# extract the number of search results
soup.find('h1').find('span').text

'805'

In [97]:
job_link = soup.find('a',{'class':"result-card__full-card-link"})['href']

In [98]:
job_page = requests.get(job_link)
job_content = job_page.text
soup_job = BeautifulSoup(job_content,'lxml')

In [99]:
soup_job.find('h1',{'class':'topcard__title'}).text

'Data Scientist'

In [100]:
soup_job.find('a',{'class':'topcard__org-name-link topcard__flavor--black-link'}).text

'Visa'

# Coursera

#### Data Science with Python 5

In [172]:
url = 'https://www.coursera.org/specializations/data-science-python#courses'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [173]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
links = []
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])
links.append('https://www.coursera.org/learn/python-social-network-analysis')

#### IBM data science 9

In [174]:
url = 'https://www.coursera.org/professional-certificates/ibm-data-science#courses'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [175]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])
links.append('https://www.coursera.org/learn/sql-data-science')
links.append('https://www.coursera.org/learn/data-analysis-with-python')
links.append('https://www.coursera.org/learn/python-for-data-visualization')
links.append('https://www.coursera.org/learn/machine-learning-with-python')
links.append('https://www.coursera.org/learn/applied-data-science-capstone')

#### Data Science Specialization (John Hopkins) 10

In [176]:
url = 'https://www.coursera.org/specializations/jhu-data-science'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [177]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])
links.append('https://www.coursera.org/learn/reproducible-research')
links.append('https://www.coursera.org/learn/statistical-inference')
links.append('https://www.coursera.org/learn/regression-models')
links.append('https://www.coursera.org/learn/practical-machine-learning')
links.append('https://www.coursera.org/learn/data-products')
links.append('https://www.coursera.org/learn/data-science-project')

#### Data Science: Statistics and Machine Learning Specialization (John Hopkins) 5

In [178]:
url = 'https://www.coursera.org/specializations/data-science-statistics-machine-learning'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [179]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])
links.append('https://www.coursera.org/learn/data-science-project')

#### Data Engineering, Big Data, and Machine Learning on GCP (Google) 5

In [180]:
url = 'https://www.coursera.org/specializations/data-science-statistics-machine-learning'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [181]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])
links.append('https://www.coursera.org/learn/smart-analytics-machine-learning-ai-gcp')

#### Advanced Data Science with IBM 4

In [182]:
url = 'https://www.coursera.org/specializations/advanced-data-science-ibm'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [183]:
a = soup.find_all('div',{'class':'Row_nvwp6p CourseItem'})
for b in a:
    links.append('https://www.coursera.org' + b.find('a',{'data-e2e':"course-link"})['href'])

## Get course names and descriptions

In [170]:
course_names = []
for course_link in links:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    name = course_soup.find('h1').text
    course_names.append(name)

In [185]:
descs = []
for course_link in links:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    desc = course_soup.find('div',{'class':'content-inner'}).text
    descs.append(desc)

In [204]:
df_coursera = pd.DataFrame({'Course Names':course_names,'Course Description':descs})

In [205]:
df_coursera

Unnamed: 0,Course Names,Course Description
0,Introduction to Data Science in Python,This course will introduce the learner to the ...
1,"Applied Plotting, Charting & Data Representati...",This course will introduce the learner to info...
2,Applied Machine Learning in Python,This course will introduce the learner to appl...
3,Applied Text Mining in Python,This course will introduce the learner to text...
4,Applied Social Network Analysis in Python,This course will introduce the learner to netw...
5,What is Data Science?,The art of uncovering the insights and trends ...
6,Open Source tools for Data Science,What are some of the most popular data science...
7,Data Science Methodology,Despite the recent increase in computing power...
8,Python for Data Science and AI,This introduction to Python will kickstart you...
9,Databases and SQL for Data Science,Much of the world's data resides in databases....


In [206]:
df_coursera.insert(0, 'Specialization Names', '')

In [208]:
df_coursera.iloc[:5,0] = 'Data Science with Python (UoM)'
df_coursera.iloc[5:14,0] = 'IBM Data Science'
df_coursera.iloc[14:24,0] = 'Data Science (JHU)'
df_coursera.iloc[24:29,0] = 'Data Science: Statistics and Machine Learning Specialization (JHU)'
df_coursera.iloc[29:34,0] = 'Data Engineering, Big Data, and Machine Learning on GCP (Google)'
df_coursera.iloc[34:,0] = 'Advanced Data Science with IBM'

In [209]:
df_coursera

Unnamed: 0,Specialization Names,Course Names,Course Description
0,Data Science with Python (UoM),Introduction to Data Science in Python,This course will introduce the learner to the ...
1,Data Science with Python (UoM),"Applied Plotting, Charting & Data Representati...",This course will introduce the learner to info...
2,Data Science with Python (UoM),Applied Machine Learning in Python,This course will introduce the learner to appl...
3,Data Science with Python (UoM),Applied Text Mining in Python,This course will introduce the learner to text...
4,Data Science with Python (UoM),Applied Social Network Analysis in Python,This course will introduce the learner to netw...
5,IBM Data Science,What is Data Science?,The art of uncovering the insights and trends ...
6,IBM Data Science,Open Source tools for Data Science,What are some of the most popular data science...
7,IBM Data Science,Data Science Methodology,Despite the recent increase in computing power...
8,IBM Data Science,Python for Data Science and AI,This introduction to Python will kickstart you...
9,IBM Data Science,Databases and SQL for Data Science,Much of the world's data resides in databases....


In [210]:
df_coursera.to_csv('coursera.csv')

# Datacamp

In [220]:
url = 'https://www.datacamp.com/search?q=Data%20Science'
html = requests.get(url)
html = html.text
soup = BeautifulSoup(html, 'lxml')

In [224]:
a = soup.find_all('a',{'class':"shim ds-snowplow-search-v2-result-course"})
links_dc = []
for b in a:
    links_dc.append('https://www.datacamp.com' + b['href'])

In [272]:
course_html = requests.get(links_dc[1])
course_html = course_html.text
course_soup = BeautifulSoup(course_html, 'lxml')

In [273]:
course_soup.find('p',{'class':'course__description'}).text

"What is data science and how can you use it to strengthen your organization? This course will teach you about the skills you need on your data team, and how you can structure that team to meet your organization's needs. Data is everywhere! This course will provide you with an understanding of data sources your company can use and how to store that data. You'll also discover ways to analyze and visualize your data through dashboards and A/B tests. To wrap up the course, we'll discuss exciting topics in machine learning, including clustering, time series prediction, natural language processing (NLP), deep learning, and explainable AI! Along the way, you'll learn about a variety of real-world applications of data science and gain a better understanding of these concepts through practical exercises."

In [252]:
list(set(course_soup.find_all('h4',{'class':'chapter__title'})))

[<h4 class="chapter__title">
           Introduction to Data Science
         </h4>, <h4 class="chapter__title">
           Data Collection and Storage
         </h4>, <h4 class="chapter__title">
           Prediction
         </h4>, <h4 class="chapter__title">
           Analysis and Visualization
         </h4>]

In [255]:
list(set(course_soup.find_all('p',{'class':'chapter__description dc-u-fxi-fg-1 dc-u-pr-24 dc-u-pl-24 dc-u-mb-18'})))

[<p class="chapter__description dc-u-fxi-fg-1 dc-u-pr-24 dc-u-pl-24 dc-u-mb-18">
     We'll start the course by defining what data science is. We'll cover the data science workflow, and how data science is applied to real-world business problems. We'll finish the chapter by learning about ways to structure your data team to meet your organization's needs. 
   </p>,
 <p class="chapter__description dc-u-fxi-fg-1 dc-u-pr-24 dc-u-pl-24 dc-u-mb-18">
     In this chapter, we'll discuss ways to explore and visualize data through dashboards. We'll discuss the elements of a dashboard and how to make a directed request for a dashboard. This chapter will also cover making ad hoc data requests and A/B tests, which are a powerful analytics tool that de-risk decision-making. 
   </p>,
 <p class="chapter__description dc-u-fxi-fg-1 dc-u-pr-24 dc-u-pl-24 dc-u-mb-18">
     In this final chapter, we'll discuss the buzziest topic in data science: machine learning! We'll cover supervised and unsupervised m

In [258]:
course_soup.find('h1').text

'Data Science for Business'

In [290]:
course_names,chapter_num = [],[]
for course_link in links_dc:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    name = course_soup.find('h1').text
    num = len(set(course_soup.find_all('h4',{'class':'chapter__title'})))
    course_names.append(name)
    chapter_num.append(num)

In [277]:
course_descs = []
for course_link in links_dc:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    try:
        desc = course_soup.find('p',{'class':'course__description'}).text
    except:
        desc = "No Description"
    course_descs.append(desc)

In [356]:
df_dc_courses = pd.DataFrame({'Course Names':course_names,'Number of Chapters':chapter_num,'Course Description':course_descs})
df_dc_courses

Unnamed: 0,Course Names,Number of Chapters,Course Description
0,Data Science for Everyone,4,"What is data science, why is it so popular, an..."
1,Data Science for Business,4,What is data science and how can you use it to...
2,Python Data Science Toolbox (Part 1),3,It's time to push forward and develop your Pyt...
3,Python Data Science Toolbox (Part 2),3,In this second Python Data Science Toolbox cou...
4,Introduction to Data Science in Python,4,Begin your journey into Data Science! Even if ...
5,Data Types for Data Science in Python,5,Have you got your basic Python programming cho...
6,Unit Testing for Data Science in Python,4,Every data science project needs unit testing....
7,Linear Algebra for Data Science in R,4,Linear algebra is one of the most important se...
8,Intermediate Python,5,Learning Python is crucial for any aspiring da...
9,Analyzing Election and Polling Data in R,4,This is an introductory course to the R progra...


In [266]:
chapter_names = []
for course_link in links_dc:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    a = list(set(course_soup.find_all('h4',{'class':'chapter__title'})))
    for b in a:
        chapter_names.append(b.text.strip())

In [284]:
chapter_descs = []
for course_link in links_dc:
    course_html = requests.get(course_link)
    course_html = course_html.text
    course_soup = BeautifulSoup(course_html, 'lxml')
    a = list(set(course_soup.find_all('p',{'class':'chapter__description dc-u-fxi-fg-1 dc-u-pr-24 dc-u-pl-24 dc-u-mb-18'})))
    for b in a:
        try:
            chapter_descs.append(b.text.strip())
        except AttributeError:
            chapter_descs.append('No Description for This Chapter')

In [357]:
df_dc = pd.DataFrame({'Chapter Names':chapter_names,'Chapter Description':chapter_descs})
df_dc

Unnamed: 0,Chapter Names,Chapter Description
0,Introduction to Data Science,Data preparation is fundamental: data scientis...
1,Data Collection and Storage,We'll start the course by defining what data s...
2,"Preparation, Exploration, and Visualization","In this final chapter, we'll discuss experimen..."
3,Experimentation and Prediction,Now that we understand the data science workfl...
4,Introduction to Data Science,We'll start the course by defining what data s...
...,...,...
192,Classification with XGBoost,Take your XGBoost skills to the next level by ...
193,How to write a function,"Learn how to return early from a function, how..."
194,Case study on grain yields,Apply your function writing skills to a case s...
195,Return values and scope,Learn why writing your own functions is useful...


In [358]:
df_dc.insert(0, 'Course Names', '')

In [359]:
a = 0
for i in range(len(df_dc_courses)):
    num = df_dc_courses.iloc[i,1]
    df_dc.iloc[a:a+num,0] = df_dc_courses.iloc[i,0]
    a += num

In [361]:
df_dc.insert(1,'Course Descriptions','')

In [370]:
b = 0
for i in range(len(df_dc_courses)):
    num =  df_dc_courses.iloc[i,1]
    df_dc.iloc[b:b+num,1] = df_dc_courses.iloc[i,2]
    b += num

In [371]:
df_dc

Unnamed: 0,Course Names,Course Descriptions,Chapter Names,Chapter Description
0,Data Science for Everyone,"What is data science, why is it so popular, an...",Introduction to Data Science,Data preparation is fundamental: data scientis...
1,Data Science for Everyone,"What is data science, why is it so popular, an...",Data Collection and Storage,We'll start the course by defining what data s...
2,Data Science for Everyone,"What is data science, why is it so popular, an...","Preparation, Exploration, and Visualization","In this final chapter, we'll discuss experimen..."
3,Data Science for Everyone,"What is data science, why is it so popular, an...",Experimentation and Prediction,Now that we understand the data science workfl...
4,Data Science for Business,What is data science and how can you use it to...,Introduction to Data Science,We'll start the course by defining what data s...
...,...,...,...,...
192,Extreme Gradient Boosting with XGBoost,Do you know the basics of supervised learning ...,Classification with XGBoost,Take your XGBoost skills to the next level by ...
193,Introduction to Writing Functions in R,Being able to write your own functions makes y...,How to write a function,"Learn how to return early from a function, how..."
194,Introduction to Writing Functions in R,Being able to write your own functions makes y...,Case study on grain yields,Apply your function writing skills to a case s...
195,Introduction to Writing Functions in R,Being able to write your own functions makes y...,Return values and scope,Learn why writing your own functions is useful...


In [373]:
df_dc.to_csv('datacamp.csv')