In [None]:
# Try how to perform data integration (data merging)
# Customer information dataset
customer_id  name         address       phone_number
1            John Doe     123 Main St   555-555-5555
2            Jane Doe     456 Oak Ave   555-555-5556
3            John Smith   789 Birch Rd  555-555-5557

# Purchase information dataset
purchase_id  customer_id  purchase_date  product
1            1            2022-01-01     T-Shirt
2            2            2022-02-01     Hat
3            1            2022-03-01     Shoes
4            3            2022-04-01     Pants


To integrate (or merge) the two datasets in Python, you can use the pandas library. 

Import the pandas library.
Create the two datasets as pandas DataFrames.
Merge the DataFrames based on the common column (customer_id).

In [5]:
import pandas as pd

# Customer information dataset
customer_data = pd.DataFrame({
    'customer_id': [1, 2, 3],
    'name': ['John Doe', 'Jane Doe', 'John Smith'],
    'address': ['123 Main St', '456 Oak Ave', '789 Birch Rd'],
    'phone_number': ['555-555-5555', '555-555-5556', '555-555-5557']
})

# Purchase information dataset
purchase_data = pd.DataFrame({
    'purchase_id': [1, 2, 3, 4],
    'customer_id': [1, 2, 1, 3],
    'purchase_date': ['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01'],
    'product': ['T-Shirt', 'Hat', 'Shoes', 'Pants']
})

# Merge the two datasets on 'customer_id'
merged_data = pd.merge(customer_data, purchase_data, on='customer_id')

print(merged_data)

   customer_id        name       address  phone_number  purchase_id  \
0            1    John Doe   123 Main St  555-555-5555            1   
1            1    John Doe   123 Main St  555-555-5555            3   
2            2    Jane Doe   456 Oak Ave  555-555-5556            2   
3            3  John Smith  789 Birch Rd  555-555-5557            4   

  purchase_date  product  
0    2022-01-01  T-Shirt  
1    2022-03-01    Shoes  
2    2022-02-01      Hat  
3    2022-04-01    Pants  


This merged DataFrame includes the customer information along with their corresponding purchase details.

# If your data is stored in csv file as shown

In [None]:
Customer data:
customer_id,name,address,phone_number
1,John Doe,123 Main St,555-555-5555
2,Jane Doe,456 Oak Ave,555-555-5556
3,John Smith,789 Birch Rd,555-555-5557

In [None]:
Purchase Data:
purchase_id,customer_id,purchase_date,product
1,1,2022-01-01,T-Shirt
2,2,2022-02-01,Hat
3,1,2022-03-01,Shoes
4,3,2022-04-01,Pants

In [None]:
import pandas as pd

# Read the CSV files into DataFrames
customer_data = pd.read_csv('customer_data.csv')
purchase_data = pd.read_csv('purchase_data.csv')

# Merge the DataFrames on 'customer_id'
merged_data = pd.merge(customer_data, purchase_data, on='customer_id')

print(merged_data)

# If the data is already in Python dictionaries:

In [7]:
import pandas as pd

# Simplified data definition
customer_data = {
    'customer_id': [1, 2, 3],
    'name': ['John Doe', 'Jane Doe', 'John Smith'],
    'address': ['123 Main St', '456 Oak Ave', '789 Birch Rd'],
    'phone_number': ['555-555-5555', '555-555-5556', '555-555-5557']
}

purchase_data = {
    'purchase_id': [1, 2, 3, 4],
    'customer_id': [1, 2, 1, 3],
    'purchase_date': ['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01'],
    'product': ['T-Shirt', 'Hat', 'Shoes', 'Pants']
}

# Convert dictionaries to DataFrames
customer_df = pd.DataFrame(customer_data)
purchase_df = pd.DataFrame(purchase_data)

# Merge the DataFrames
merged_data = pd.merge(customer_df, purchase_df, on='customer_id')

print(merged_data)

   customer_id        name       address  phone_number  purchase_id  \
0            1    John Doe   123 Main St  555-555-5555            1   
1            1    John Doe   123 Main St  555-555-5555            3   
2            2    Jane Doe   456 Oak Ave  555-555-5556            2   
3            3  John Smith  789 Birch Rd  555-555-5557            4   

  purchase_date  product  
0    2022-01-01  T-Shirt  
1    2022-03-01    Shoes  
2    2022-02-01      Hat  
3    2022-04-01    Pants  


CSV/Excel/JSON: If your data is in files, use pd.read_csv(), pd.read_excel(), or pd.read_json() to read the data directly into DataFrames.
SQL Databases: Use pd.read_sql_query() to pull data directly from a database into DataFrames.
Dictionaries: If the data is in Python dictionaries, you can convert them to DataFrames using pd.DataFrame().

From SQL Databases:

In [None]:
import pandas as pd
import sqlite3

# Connecting to a SQLite database
conn = sqlite3.connect('database.db')

# Reading data from SQL tables into DataFrames
customer_data = pd.read_sql_query("SELECT * FROM customers", conn)
purchase_data = pd.read_sql_query("SELECT * FROM purchases", conn)

# Close the connection
conn.close()