### Data Acquisition

In [6]:
# Data:
 # Structured data - tabular data - db table excel file, csv files
 # semi structured - {"id": 1 , "name": xyz:, "value": 55}, {id": 1 , "name": xyz, "test_report_data": abc}
 # unstructured data - images, text files, sound, videos, etc

In [None]:
# Data acquisition - Data Cleaning - Data Preprocessing - Model training - Cross validation - Evaluate model - Hyperparameter optimation - deploy for prediction

In [7]:
import pandas as pd

# Read CSV
csv_data = pd.read_csv("saved_data.csv")

csv_data

Unnamed: 0,Name,Age,Gender
0,Alice,25,Female
1,Bob,30,Male
2,Charlie,22,Male


In [9]:
# Read Excel
excel_data = pd.read_excel("saved_data.xlsx",sheet_name="Sheet1")

excel_data

Unnamed: 0,Name,Age,Gender
0,Alice,25,Female
1,Bob,30,Male
2,Charlie,22,Male


In [10]:
# Read Excel
excel_data = pd.read_excel("saved_data.xlsx", sheet_name= ["Sheet1","Sheet2"])

print(excel_data)
print("------------")
print(type(excel_data))
print("------------")
print(excel_data["Sheet1"])
print("------------")
print(type(excel_data["Sheet1"]))

{'Sheet1':       Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   22    Male, 'Sheet2':   Name  Age  Gender
0    A   25  Female
1    B   30    Male
2    C   22    Male}
------------
<class 'dict'>
------------
      Name  Age  Gender
0    Alice   25  Female
1      Bob   30    Male
2  Charlie   22    Male
------------
<class 'pandas.core.frame.DataFrame'>


In [25]:
# Read JSON
json_data = pd.read_json("saved_data.json")

json_data

Unnamed: 0,Name,Age,Gender
0,Alice,25,Female
1,Bob,30,Male
2,Charlie,22,Male


In [12]:
# SQLite Database
from sqlalchemy import create_engine
engine = create_engine("sqlite:///saved_database.db")
sqlite_query = "SELECT * FROM saved_table LIMIT 2"
sqlite_data = pd.read_sql(sqlite_query, engine)
sqlite_data

Unnamed: 0,Name,Age,Gender
0,Alice,25,Female
1,Bob,30,Male


In [35]:
# Getting data from URL
import pandas as pd

# URL of the Iris dataset CSV file hosted on GitHub
csv_url = "https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv"

# Read data from the CSV file URL
data_df = pd.read_csv(csv_url)

data_df


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [45]:
import pandas as pd
import requests

# API URL
api_url = "https://randomuser.me/api/?results=5"

# Fetch data from API
response = requests.get(api_url)

if response.status_code == 200: 
    api_data = response.json()["results"]
    api_df = pd.json_normalize(api_data)
else:
    print("Failed to fetch data from the API")
api_df

Unnamed: 0,gender,email,phone,cell,nat,name.title,name.first,name.last,location.street.number,location.street.name,...,login.sha256,dob.date,dob.age,registered.date,registered.age,id.name,id.value,picture.large,picture.medium,picture.thumbnail
0,male,steven.wells@example.com,07-8889-1957,0492-878-550,AU,Mr,Steven,Wells,2516,College St,...,fdf2bb26f9d5559eee0930de5e3faf23214bbc9e15b101...,1993-05-05T03:39:23.562Z,30,2015-12-30T12:50:30.483Z,7,TFN,353802861,https://randomuser.me/api/portraits/men/81.jpg,https://randomuser.me/api/portraits/med/men/81...,https://randomuser.me/api/portraits/thumb/men/...
1,female,tracy.bowman@example.com,071-190-7718,081-312-5521,IE,Miss,Tracy,Bowman,7497,Patrick Street,...,a40504587af06313f0c4cba435af79d4c74462ef9336ce...,1985-10-25T02:27:44.100Z,37,2017-01-06T05:22:07.784Z,6,PPS,8028725T,https://randomuser.me/api/portraits/women/91.jpg,https://randomuser.me/api/portraits/med/women/...,https://randomuser.me/api/portraits/thumb/wome...
2,female,paula.kihle@example.com,71054766,94226188,NO,Mrs,Paula,Kihle,633,Maria Dehlis vei,...,0f86d82d8cd2503da37773f86e87677a2a60009a10860e...,1963-12-27T00:08:21.673Z,59,2019-07-20T13:09:56.740Z,4,FN,27126305023,https://randomuser.me/api/portraits/women/71.jpg,https://randomuser.me/api/portraits/med/women/...,https://randomuser.me/api/portraits/thumb/wome...
3,male,vist.smik@example.com,(068) X82-3802,(068) F02-8325,UA,Mr,Vist,Smik,5096,Vilnyuskiy provulok,...,00545bc5ed8235f14eb7146095aa3f860e8487a40164c5...,1991-08-15T12:43:19.599Z,31,2017-10-07T14:34:53.916Z,5,,,https://randomuser.me/api/portraits/men/46.jpg,https://randomuser.me/api/portraits/med/men/46...,https://randomuser.me/api/portraits/thumb/men/...
4,female,delphine.smith@example.com,L10 M21-3117,I21 H85-8580,CA,Ms,Delphine,Smith,8660,Parliament St,...,754cf9b376419ca5ca2267e3afd02b9204edd78bf77bd7...,1985-07-17T13:35:19.328Z,38,2019-04-30T21:12:15.526Z,4,SIN,782251268,https://randomuser.me/api/portraits/women/14.jpg,https://randomuser.me/api/portraits/med/women/...,https://randomuser.me/api/portraits/thumb/wome...


In [38]:
#Scrape Data to create Dataframes
import requests
from bs4 import BeautifulSoup
import pandas as pd

# URL of the website for practicing web scraping
url = "http://quotes.toscrape.com"

# Send a GET request to the website
response = requests.get(url)

if response.status_code == 200:
    soup = BeautifulSoup(response.content, "html.parser")

    # Find all quote elements
    quote_elements = soup.find_all("div", class_="quote")

    # Initialize lists to store scraped data
    quotes = []
    authors = []
    tags = []

    # Loop through each quote element and extract data
    for quote_element in quote_elements:
        quote = quote_element.find("span", class_="text").text
        author = quote_element.find("small", class_="author").text
        tags_list = [tag.text for tag in quote_element.find_all("a", class_="tag")]

        quotes.append(quote)
        authors.append(author)
        tags.append(tags_list)

    # Create a DataFrame from the scraped data
    quote_data = {
        "Quote": quotes,
        "Author": authors,
        "Tags": tags
    }

    quotes_df = pd.DataFrame(quote_data)
    print("Scraped Quote Data:")
else:
    print("Error: Unable to fetch data from the website")
quotes_df

Scraped Quote Data:


Unnamed: 0,Quote,Author,Tags
0,“The world as we have created it is a process ...,Albert Einstein,"[change, deep-thoughts, thinking, world]"
1,"“It is our choices, Harry, that show what we t...",J.K. Rowling,"[abilities, choices]"
2,“There are only two ways to live your life. On...,Albert Einstein,"[inspirational, life, live, miracle, miracles]"
3,"“The person, be it gentleman or lady, who has ...",Jane Austen,"[aliteracy, books, classic, humor]"
4,"“Imperfection is beauty, madness is genius and...",Marilyn Monroe,"[be-yourself, inspirational]"
5,“Try not to become a man of success. Rather be...,Albert Einstein,"[adulthood, success, value]"
6,“It is better to be hated for what you are tha...,André Gide,"[life, love]"
7,"“I have not failed. I've just found 10,000 way...",Thomas A. Edison,"[edison, failure, inspirational, paraphrased]"
8,“A woman is like a tea bag; you never know how...,Eleanor Roosevelt,[misattributed-eleanor-roosevelt]
9,"“A day without sunshine is like, you know, nig...",Steve Martin,"[humor, obvious, simile]"


In [3]:
# Streaming data, like data from Sensors

import pandas as pd
import time
import random

# sensor -> 1min sending temperature info to apple server : 12:01 - 40C , 12:02 -42 ... 

    

    
threshold = 95

    

# Create an empty DataFrame with columns
columns = ["Timestamp", "Value"]
stream_df = pd.DataFrame(columns=columns)

# Simulate streaming data
for _ in range(10):
    timestamp = pd.Timestamp.now()
    value = random.randint(0, 100)
    new_row = {"Timestamp": timestamp, "Value": value}
    stream_df.loc[len(stream_df)] = new_row
    print(stream_df)
    forecast(stream_df)
    time.sleep(10)  # Simulate waiting for new data


                   Timestamp  Value
0 2023-08-18 21:05:12.541990     30


TypeError: forecast() takes 0 positional arguments but 1 was given

In [None]:
buld - wipro - phone