# Acquisition Feature Engineering and Labeling
Internally normalized features most likelu to contain predictive power are found for each company in the acqusiiton dataset.
In the labeling section, we compare the results of the retroactive DCF with the valuation at which the deal was executed to determine whether the terms of the deal proved favorable to the target company or acquiring company. We also evaluate the dataset on a case by case basis to eliminate any outliers or likely misconstrued data points.

In [15]:
import pandas as pd
import numpy as np
import lseg.data as ld
from lseg.data.content import search
from datetime import date, timedelta



## Load the dataset and configure the new dataset

In [16]:
original_df = pd.read_csv("updated_acquisition_deals.csv")
original_df.head()

columns = ["TargetName",
    "TargetTicker"
    "AcquirerName", 
    "TransactionAnnouncemntDate", 
    "DealValuation", 
    "RetroactiveDCFValuation", 
    "SectorEnergy", 
    "SectorMaterials", 
    "SectorIndustrials", 
    "SectorConsumerDiscretionary", 
    "SectorConsumerStaples", 
    "SectorHealthCare", 
    "SectorFinancials", 
    "SectorInformationTechnology", 
    "SectorCommunicationServices", 
    "SectorUtilities", 
    "SectorRealEstate",
    "RatioOfTransactionValueToNetIncome",
    "RatioOfTransactionValueToCashFlow",
    "RatioOfTransactionValueToSales",
    "RatioOfRankvalueToNetIncome",
    "RatioOfRankvalueToCashFlow",
    "RatioOfRankvalueToSales",
    "NebtDebtToEquity",
    "InterestCoverageRatio",
    "ReturnOnEquity",
    "CurrentRatio",
    "ProjectedLongTermGrowthRate",
    "EBITDAMargin",
    "DebtServiceCoverageRatio",
    "Label"
    ]

complete_df = pd.DataFrame(columns=columns)



Unnamed: 0,AcquirerName,TargetName,TransactionAnnouncementDate,PercentageOfSharesAcquired,AcquirerRegion,TargetRegion,PricePerShare,CurrencyCodeOfTransaction,DealPurpose,FormOfTransactionName,...,RatioOfTransactionValueToNetIncome,RatioOfTransactionValueToCashFlow,RatioOfTransactionValueToSales,RatioOfRankvalueToNetIncome,RatioOfRankvalueToCashFlow,RatioOfRankvalueToSales,TransactionValueIncludingNetDebtOfTarget,TransactionEffectiveDate,Ticker,RetroactiveDCFValuation
0,['Hong Kong Exchanges and Clearing Ltd'],['London Stock Exchange Group PLC'],2019-09-11,0,Asia Pacific Excluding Central Asia,Europe,103.287,STG,"['Expand presence in new/foreign markets', 'Cr...",Merger,...,110.412,51.97,25.666,110.412,51.97,25.666,37259.153,2019-09-11,LSEG.L,10522450000.0
1,['Xerox Holdings Corp'],['HP Inc'],2019-11-06,0,Americas,Americas,23.938,US,['Strengthen existing operations/expand presen...,Merger,...,11.742,6.804,0.594,11.961,6.93,0.606,35524.904,2019-11-06,HPQ,53177950000.0
2,['Aon PLC'],['Willis Towers Watson PLC'],2020-03-09,0,Europe,Europe,234.155,STG,"['Concentrate on core businesses/assets', 'Cre...",Merger,...,29.339,14.79,3.389,29.339,14.79,3.389,30140.582,2020-03-09,WTW.O,18538970000.0
3,['Unilever NV'],['Unilever PLC'],2018-03-15,0,Europe,Europe,53.695,STG,['Strengthen existing operations/expand presen...,Merger,...,8.013,4.879,0.881,11.094,6.755,1.22,91563.677,2018-03-15,ULVR.L,103996600000.0
4,['Barrick Gold Corp'],['Newmont Corporation'],2019-02-22,0,Americas,Americas,44.014,US,"['Create synergies, eliminate duplicate servic...",Merger,...,86.512,9.793,3.232,89.523,10.134,3.345,24260.818,2019-02-22,NEM,21815840000.0


## Obtain Company Attribute Using get_history

In [28]:
def get_company_attribute(ticker,field,deal_date):

    ld.open_session()

    one_year_ago = deal_date - timedelta(days=365)

    df = ld.get_history(
            universe=ticker,
            fields=[field],
            interval="monthly",
            start=one_year_ago,
            end=deal_date
        )

    ld.close_session()

      # Extract the bottom-right value and check if it's numeric
    bottom_right_value = df.iloc[-1, -1]  # Last row, last column
    if isinstance(bottom_right_value, (int, float)):
        return bottom_right_value
    else:
        return None  # or any other indication that the value is not numeric
    
print(get_company_attribute("AAPL.O", "TR.F.NetDebttoTotEq",date.today())) # Test



0.74371


## Obtain Company Sector
Returns one-hot encoded vector across all possible company sectors

In [27]:
def get_sector_vector(company_name):
    # Define the sector order (without "Sector" prefix)
    sectors = [
        "Energy",
        "Materials",
        "Industrials",
        "Consumer Discretionary",
        "Consumer Staples",
        "Health Care",
        "Financials",
        "Information Technology",
        "Communication Services",
        "Utilities",
        "Real Estate"
    ]

    ld.open_session()

    # Retrieve the GICS sector
    gics_df = ld.discovery.search(
        view=ld.discovery.Views.ORGANISATIONS,
        select="GicsName",
        filter=f"CommonName eq '{company_name}'",
        top=10000
    )
    
    ld.close_session()

    # Extract the sector name from the dataframe
    gics_sector = gics_df.iloc[0, 0][0]

    # Initialize a one-hot vector
    one_hot_vector = [0] * len(sectors)

    # Find the index of the sector and set it to 1 if it exists in the list
    if gics_sector in sectors:
        sector_index = sectors.index(gics_sector)
        one_hot_vector[sector_index] = 1

    return one_hot_vector

print(get_sector_vector("Apple Inc")) # Test

Information Technology
[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


## Obtain Macroeconomic Data

In [None]:
# To be implemented later
def get_macroeconomic_data(deal_date):
    return 

## Compute The Deal Valuation

In [29]:
def compute_deal_valuation(ticker,share_price, deal_date):

    # find the number of shares outstanding
    ld.open_session()

    one_year_ago = deal_date - timedelta(days=365)

    df = ld.get_history(
            universe=ticker,
            fields=['TR.NumberofSharesOutstandingActual'],
            interval="monthly",
            start=one_year_ago,
            end=deal_date
        )

    ld.close_session()

    shares_outstanding = df.iloc[-1, -1]  # Last row, last column

    deal_valuation = share_price * shares_outstanding

    return deal_valuation

# Test
date_today = date.today()
print(compute_deal_valuation("AAPL.O", 222.91, date_today))

3434618456450.0


## Loop Through the Original Dataset and Construct the New Data Set

In [None]:
for idx, row in original_df.iterrows():

    # Attributes we need from the original dataframe

    # Get rid of the formatting
    target_name = row["TargetName"]
    acquirer_name = row["AcquirerName"]
    deal_date = 
    ticker = 
    dcf_val = row[""]
    

    history_attribute_codes = ["TR.F.NetDebttoTotEq", "TR.F.IntrCovRatio", "TR.ROEActValue", "TR.F.CurrRatio", "TR.LTGMean", "TR.F.EBITDAMargPct", "TR.F.DebtSrvcCovRatio"]
    history_attributes = np.zeros(len(history_attribute_codes))

    for code in history_attribute_codes:
        
    
    