In [2]:
!pip install pandas



In [3]:
import pandas as pd

# Sample data for practice
data = {
    'FullName': ['John Doe', 'Alice Smith', 'Bob Johnson', 'Charlie Brown', 'Eve White'],
    'ProductNames': ['Apple iPhone', 'Samsung Galaxy', 'Nokia Lumia', 'OnePlus Nord', 'Google Pixel'],
    'EmailAddresses': ['##john.doe123@gmail.com', 'alice.smith@yahoo.com', 'bob_johnson@outlook.com', 'charlie.brown@icloud.com', 'eve.white@google.com'],
    'Feedback': ['The product is good', 'Excellent performance', 'Good battery life', 'Bad camera quality', 'Good design'],
    'CityNames': ['new york city', 'los angeles', 'san francisco', 'chicago', 'miami'],
    'Description': ['This phone is amazing with a long-lasting battery', 'Samsung has the best screen quality', 'Nokia Lumia is durable but outdated', 'OnePlus Nord has great value for money', 'Google Pixel has the best camera'],
    'ProductCodes': ['Product123', 'Item4567', 'Device890', 'Gadget1234', 'Mobile5678']
}

#create DataFrame

df = pd.DataFrame(data)
pd.set_option("display.width",1000)
pd.set_option('display.max_colwidth', 20)
print(df)

# Split the full name into two separate columns: FirstName and LastName.

df[["FirstName","LastName"]] = df["FullName"].str.split(expand=True)
print(f"Separate FullName into FirstName and LastName: \n\n{df}")

# Convert all ProductNames to lowercase.

df["ProductNames"] = df["ProductNames"].str.lower()
print(f"Convert all ProductNames to lowercase. \n\n{df}")

#Remove unwanted special characters (like !, #, etc.) from the EmailAddresses column, keeping only valid characters.

df["EmailAddresses"] = df["EmailAddresses"].str.replace(r"[^A-Za-z0-9@._]","",regex=True)

#Replace all spaces in the ProductNames column with underscores (_).

df["ProductNames"] = df["ProductNames"].str.replace(r"\s","_",regex=True)

#Extract only the numbers from the ProductCodes column and create a new column for it.

df["Code"] = df["ProductCodes"].str.replace(r"[^0-9]","",regex=True)


# Check if Feedback Contains the Word "Good"

df["HasGoodFeedback"] = df["Feedback"].str.contains("good",case =False,na=False)

# Capitalize the first letter of each word in the CityNames column.

df["CityNames"] = df["CityNames"].str.title()

#Replace the word "bad" with "good" in the Feedback column.

df["Feedback"] = df["Feedback"].str.replace(r"bad","good").str.replace("Bad","Good")

#Count how many times the word "battery" appears in the Description column and store the result in a new column.

df["Battery_word_count"] = df["Description"].str.count("battery")

#Trim any leading and trailing spaces in the Feedback column.
df["Feedback"] = df["Feedback"].str.strip()

# Extract the first 3 characters from each product name in the ProductNames column and store the result in a new column.

df["First_three_chracter"] = df["ProductNames"].str[:3]
# Create a new column StartsWithJ that is True if the name in the FullName column starts with "J", and False otherwise.
df["StartWithJ"] = df["FullName"].str.startswith("J",na=False)

#Combine the FirstName and LastName columns into a FullName column with the format "FirstName LastName".
df["Names"] = df["FirstName"].str.cat(df["LastName"],sep = " ")

#Extract the domain (e.g., "gmail.com") from the EmailAddresses column into a new column Domain.
df["Domain"] = df["EmailAddresses"].str.extract(r"@([A-Za-z0-9.-]+)")
df



                                            

                                                    

        FullName    ProductNames       EmailAddresses             Feedback      CityNames          Description ProductCodes
0       John Doe    Apple iPhone  ##john.doe123@gm...  The product is good  new york city  This phone is am...   Product123
1    Alice Smith  Samsung Galaxy  alice.smith@yaho...  Excellent perfor...    los angeles  Samsung has the ...     Item4567
2    Bob Johnson     Nokia Lumia  bob_johnson@outl...    Good battery life  san francisco  Nokia Lumia is d...    Device890
3  Charlie Brown    OnePlus Nord  charlie.brown@ic...   Bad camera quality        chicago  OnePlus Nord has...   Gadget1234
4      Eve White    Google Pixel  eve.white@google...          Good design          miami  Google Pixel has...   Mobile5678
Separate FullName into FirstName and LastName: 

        FullName    ProductNames       EmailAddresses             Feedback      CityNames          Description ProductCodes FirstName LastName
0       John Doe    Apple iPhone  ##john.doe123@gm...  The produ

Unnamed: 0,FullName,ProductNames,EmailAddresses,Feedback,CityNames,Description,ProductCodes,FirstName,LastName,Code,HasGoodFeedback,Battery_word_count,First_three_chracter,StartWithJ,Names,Domain
0,John Doe,apple_iphone,john.doe123@gmai...,The product is good,New York City,This phone is am...,Product123,John,Doe,123,True,1,app,True,John Doe,gmail.com
1,Alice Smith,samsung_galaxy,alice.smith@yaho...,Excellent perfor...,Los Angeles,Samsung has the ...,Item4567,Alice,Smith,4567,False,0,sam,False,Alice Smith,yahoo.com
2,Bob Johnson,nokia_lumia,bob_johnson@outl...,Good battery life,San Francisco,Nokia Lumia is d...,Device890,Bob,Johnson,890,True,0,nok,False,Bob Johnson,outlook.com
3,Charlie Brown,oneplus_nord,charlie.brown@ic...,Good camera quality,Chicago,OnePlus Nord has...,Gadget1234,Charlie,Brown,1234,False,0,one,False,Charlie Brown,icloud.com
4,Eve White,google_pixel,eve.white@google...,Good design,Miami,Google Pixel has...,Mobile5678,Eve,White,5678,True,0,goo,False,Eve White,google.com


In [21]:
import pandas as pd

data = {'Date': ['2020-01-15', '2021-02-17', '2022-03-19', '2023-04-21']}
df = pd.DataFrame(data)

# Extract the Year from a Date Column


df["Year"] = df["Date"].str.extract(r"")
df"


Unnamed: 0,Date,Year
0,2020-01-15,2020
1,2021-02-17,2021
2,2022-03-19,2022
3,2023-04-21,2023


In [25]:
import pandas as pd

data = {'FullName': ['John Doe', 'Alice Smith', 'Bob Johnson', 'Charlie Brown']}
df = pd.DataFrame(data)

#Create a new column UppercaseName that contains the FullName column in uppercase.
df["UppercaseName"] = df["FullName"].str.upper()
df

Unnamed: 0,FullName,UppercaseName
0,John Doe,JOHN DOE
1,Alice Smith,ALICE SMITH
2,Bob Johnson,BOB JOHNSON
3,Charlie Brown,CHARLIE BROWN


In [35]:
import pandas as pd

data = {'Address': ['123 Main St, New York, 10001', '456 Oak Ave, Los Angeles, 90001']}
df = pd.DataFrame(data)

#Given a column Address with values in the format "Street, City, ZipCode", split this column into three separate columns: Street, City, and ZipCode.

df[["Street","City","ZipCode"]] = df["Address"].str.split(",",expand =True)
df
#help(df["Address"].str.split)

Unnamed: 0,Address,Street,City,ZipCode
0,"123 Main St, New...",123 Main St,New York,10001
1,"456 Oak Ave, Los...",456 Oak Ave,Los Angeles,90001


In [39]:
import pandas as pd

data = {'Description': ['This phone is amazing', 'Samsung has the best screen', 'This is a durable phone', 'Laptop is the best']}
df = pd.DataFrame(data)

df["HasPhone"] = df["Description"].str.contains("phone")
df

Unnamed: 0,Description,HasPhone
0,This phone is am...,True
1,Samsung has the ...,False
2,This is a durabl...,True
3,Laptop is the best,False


In [49]:
import pandas as pd

data = {'PhoneNumber': ['1234567890', '9876543210']}
df = pd.DataFrame(data)
#Given a column PhoneNumber with values in the format "1234567890", format them into the format "123-456-7890". Create a new column FormattedPhone.
df["FormattedPhone"] = df["PhoneNumber"].str.replace(r"(\d{3})(\d{3})(\d+)",r"\1-\2-\3",regex=True)
df

Unnamed: 0,PhoneNumber,FormattedPhone
0,1234567890,123-456-7890
1,9876543210,987-654-3210


In [59]:
import pandas as pd

data = {'ProductCode': ['Product123', 'Item4567', 'Device890']}
df = pd.DataFrame(data)

#Given a column ProductCode that contains a mix of letters and numbers (e.g., "Product123", "Item456"), extract just the numbers into a new column ProductNumber.
df["ProductNumber"] = df["ProductCode"].str.extract(r"(\d+)")
df

Unnamed: 0,ProductCode,ProductNumber
0,Product123,123
1,Item4567,4567
2,Device890,890


In [79]:
import pandas as pd

data = {'Tweets': ['#love the new phone', 'Great #camera features', 'Amazing #design and #performance']}
df = pd.DataFrame(data)
#Given a column Tweets that contains text with hashtags, extract all hashtags and store them in a list in a new column Hashtags.
df["Hashtags"] = df["Tweets"].str.findall(r"#\w+")
df
#help(df["Tweets"].str.findall)
                                          

Unnamed: 0,Tweets,Hashtags
0,#love the new phone,[#love]
1,Great #camera fe...,[#camera]
2,Amazing #design ...,"[#design, #perfo..."
