# **GOOGLE SEARCH WEB SCRAPING - USING SERPAPI APIs**

In [17]:
# use this: `pip install google-search-results`
from serpapi import GoogleSearch
import pandas as pd

**References:**<br>
[what to put in params](https://serpapi.com/search-api)<br>
[example](https://serpapi.com/news-results)<br>
[it's free, you get 100 search per month](https://serpapi.com/pricing)

In [27]:

params = {
    "q": "cnbcindonesia",
    "hl": "id",
    "tbm": "nws",
    "num": 20,
    "Location": "Indonesia",
    "api_key": "xxxxxx"
}

search = GoogleSearch(params)
results = search.get_dict()
news_results = results["news_results"]

# let's show just one news only
news_results[0]

https://serpapi.com/search


{'position': 1,
 'link': 'https://www.cnbcindonesia.com/news/20220709111823-4-354240/mantan-menkeu-inggris-maju-mau-gantikan-boris-johnson',
 'title': 'Mantan Menkeu Inggris Maju Mau Gantikan Boris Johnson',
 'source': 'CNBC Indonesia',
 'date': '7 jam lalu',
 'snippet': 'Jakarta, CNBC Indonesia - Mantan menteri keuangan Inggris Rishi Sunak \nmenawarkan dirinya menjadi Perdana Menteri (PM) Inggris yang baru,...',
 'thumbnail': 'https://serpapi.com/searches/62c971553c3fb2b80626fbc4/images/6b39810613d88666c46798f031e4d6dd5265d80839ee9cff1bdea9f0bd048e29.jpeg'}

### **Let's Break it Down, Where is The Data**

In [28]:
# fortunately everything is straight forward
print(len(news_results))
news_results[0].keys()

20


dict_keys(['position', 'link', 'title', 'source', 'date', 'snippet', 'thumbnail'])

### **Let's Grab The Data**

In [29]:
title = news_results[0]['title']
source = news_results[0]['source']
date = news_results[0]['date']
link = news_results[0]['link']

# let's create empty DataFrame to put our data later
df = pd.DataFrame(columns=['title', 'source', 'date', 'link'])

### **Let's Grab The Everything Using Iteration**

In [30]:
for i in news_results:
    title = i['title']
    source = i['source']
    date = i['date']
    link = i['link']

    # lets put all news we've collected into DataFrame 
    df = pd.concat([df, pd.DataFrame.from_records([{
                    'title':title,'source':source,'date':date,'link':link }])]
                    ,ignore_index=True)
df.head()

Unnamed: 0,title,source,date,link
0,Mantan Menkeu Inggris Maju Mau Gantikan Boris ...,CNBC Indonesia,7 jam lalu,https://www.cnbcindonesia.com/news/20220709111...
1,Ada 45 Emiten Terbitkan Obligasi dan Sukuk Sen...,CNBC Indonesia,9 jam lalu,https://www.cnbcindonesia.com/market/202207090...
2,"Luhut Ngamuk, Minyak Sawit Indonesia Malah Dia...",CNBC Indonesia,3 jam lalu,https://www.cnbcindonesia.com/news/20220709133...
3,Jreengg... Elon Musk Batal Beli Twitter!,CNBC Indonesia,5 jam lalu,https://www.cnbcindonesia.com/market/202207091...
4,Kamu Mau ke Luar Negeri? Booster Dulu! Wajib M...,CNBC Indonesia,8 jam lalu,https://www.cnbcindonesia.com/news/20220709105...


In [31]:
# save it into csv
df.to_csv(r'C:\Users\wis\Documents\GitHub\webscraping\google\googlenews.csv', index=False)