# Acquisition Data Preparation: Completed Deals
Here we obtain data for deals which were executed but the target company continued to operate as an independent entity. 

In [2]:
import lseg.data as ld
from lseg.data.content import search
import pandas as pd
from datetime import datetime, timedelta
from IPython.display import display, HTML
import re

In [3]:
# To get organization metadata
ld.open_session()

response = search.metadata.Definition(
    view = search.Views.ORGANISATIONS  # Required parameterc
).get_data()


# Set display options to print the entire DataFrame
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # Show full content in each cell

# Now, print the entire DataFrame
print(response.data.df)

# Reset display options if you don't want this setting to persist
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pd.reset_option('display.max_colwidth')

                                                                                              Type  \
ActiveDebtInstrumentCount                    ActiveDebtInstrumentCount                      Double   
ActiveEquityInstrumentCount                  ActiveEquityInstrumentCount                    Double   
ActiveEstimatesExist                         ActiveEstimatesExist                          Boolean   
ActiveInstrumentCount                        ActiveInstrumentCount                          Double   
AddressCity                                  AddressCity                                    String   
AddressLine1                                 AddressLine1                                   String   
AddressLine1Ja                               AddressLine1Ja                                 String   
AddressLine1Zh                               AddressLine1Zh                                 String   
AddressLine2                                 AddressLine2                         

## Gather information for all companies which have a Parent Company
The fact that these companies exist in the companies dataset means that their financials are reported seeperately. These companies also have a strong chance of having been acquired at some point since they have a parent company. 

In [4]:
# Need to execute a seperate search on each industry to avoid API Limits on the number of search results
GicsNames = ['Industrials', 'Consumer Staples', 'Materials', 'Health Care', 'Informational Technology', 'Financials', 'Real Estate', 'Consumer Discretionary', 'Energy', 'Communication Services', 'Utilities']

subsidiaries = [] # Stores the dfs for each industry

# Iterate through each industry and find all companies which have a parent company
for name in GicsNames:

    sub_df = ld.discovery.search(
        view=ld.discovery.Views.ORGANISATIONS,
        select="CommonName, UltimateParentOrganisationName", 
        filter=f"UltimateParentOrganisationName ne null and GicsName eq '{name}'",
        top=10000
    )
    subsidiaries.append(sub_df)

subsidiary_df = pd.concat(subsidiaries, ignore_index=True)
print(len(subsidiary_df))
subsidiary_df.head()

47106


Unnamed: 0,CommonName,UltimateParentOrganisationName
0,International Holding Company PJSC,International Holding Company PJSC
1,General Electric Co,General Electric Co
2,Caterpillar Inc,Caterpillar Inc
3,RTX Corp,RTX Corp
4,Siemens AG,Siemens AG


## As one can see, often the organisation itself is listed as its own ultimate parent. For this reason, we filter the df to exclude all cases in which the parent and company name are the same. 

In [5]:
# Filter rows where CommonName is not the same as UltimateParentOrganisationName
subsidiary_df = subsidiary_df[(subsidiary_df['CommonName'] != subsidiary_df['UltimateParentOrganisationName'])]

subsidiary_df.head()

Unnamed: 0,CommonName,UltimateParentOrganisationName
6,Contemporary Amperex Technology Co Ltd,Xiamen Ruiting Investment Co Ltd
31,Thomson Reuters Corp,Woodbridge Company Ltd
36,LG Energy Solution Ltd,LG Chem Ltd
75,WEG SA,WPA Participacoes e Servicos SA
77,Beijing-Shanghai High Speed Railway Co Ltd,"China, People's Republic of (Government)"


In [6]:
# Get M&A deals metadata
# To get organization metadata
ld.open_session()

response = search.metadata.Definition(
    view = search.Views.DEALS_MERGERS_AND_ACQUISITIONS  # Required parameterc
).get_data()


# Set display options to print the entire DataFrame
pd.set_option('display.max_rows', None)  # Show all rows
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # Show full content in each cell

# Now, print the entire DataFrame
print(response.data.df)

# Reset display options if you don't want this setting to persist
pd.reset_option('display.max_rows')
pd.reset_option('display.max_columns')
pd.reset_option('display.max_colwidth')

                                              Type  Searchable  Sortable  \
AcquirerAdvisorCode                         String        True     False   
AcquirerAdvisorCountry                      String        True     False   
AcquirerAdvisorCusip                        String        True     False   
AcquirerAdvisorName                         String        True     False   
AcquirerAdvisorOAPermId                     String        True     False   
AcquirerAdvisorRegion                       String        True     False   
AcquirerAdvisorRegionCode                   String        True     False   
AcquirerAdvisorRole                         String        True     False   
AcquirerAdvisorSubRegion                    String        True     False   
AcquirerAdvisorSubRegionCode                String        True     False   
AcquirerAdvisorSubRole                      String        True     False   
AcquirerBusinessDescriptionLong             String        True     False   
AcquirerBusi

## Run a search to see if there exists an acquisiiton in the LSEG database where the company names are the same as the parent and subsidiary names from the company search. The goal here is to only find subsidiaries formed through acqusiition as opposed to spin offs or other deals


In [7]:
'''
Remove Additional Qualifiers to Company Names like Inc/LLC
These are often inconsistent pre and post acquisition and the LSEG search finds all matches up to the input
However, we still would like to have a mapping to go back to these names after we execute the search 
'''
def remove_qualifiers_and_create_mapping(df):
    # regex for all of the qualifier suffixes we want to remove
    suffixes = r"( LLC| PLC| Corp| Inc| Co| Ltd)$"
    
    # Create a mapping dictionary
    mapping = {}

    # Apply the cleaning function and populate the mapping dictionary
    for index, row in df.iterrows():

        cleaned_name = re.sub(suffixes, "", row['CommonName'], flags=re.IGNORECASE).strip()
        original_name = row['CommonName']

        # Only add to mapping if not already there
        if cleaned_name not in mapping:
            mapping[cleaned_name] = original_name
        
        # Clean the name in the DataFrame
        df.at[index, 'CommonName'] = cleaned_name

        # Do the same for the UltimateParentOrganisationName
        cleaned_parent_name = re.sub(suffixes, "", row['UltimateParentOrganisationName'], flags=re.IGNORECASE).strip()
        original_parent_name = row['UltimateParentOrganisationName']

        if cleaned_parent_name not in mapping:
            mapping[cleaned_parent_name] = original_parent_name
        
        df.at[index, 'UltimateParentOrganisationName'] = cleaned_parent_name

    return df, mapping

'''
Create a filter which restricts on when the AcquirerName in the M&A Deals datset equals the UltimateParentOrganisationName and the TargetName equals the CommonName
We create a list of 10 filters at a time to avoid the API restriction on the size of the filter
'''
def create_name_filter(df, batch_size):
    filters = []
    
    # Pre-process the names for filtering
    df['CleanedUltimateParent'] = df['UltimateParentOrganisationName'].str.replace("'", "''")
    df['CleanedCommonName'] = df['CommonName'].str.replace("'", "''")

    # Create filters in batches of 10
    for i in range(0, len(df), batch_size):
        batch = df.iloc[i:i + batch_size]
        batch_filter = " or ".join(
            f"(AcquirerName eq '{row['CleanedUltimateParent']}' and TargetName eq '{row['CleanedCommonName']}')"
            for _, row in batch.iterrows()
        )
        filters.append(batch_filter)
        
    return filters


subsidiary_df, name_mapping = remove_qualifiers_and_create_mapping(subsidiary_df)
filter_batches = create_name_filter(subsidiary_df,20)

# List to collect results from all the API calls on sets of 10 name filter pairs
deals = []

to_select = [
    "AcquirerName",
    "TargetName",
    "TransactionEffectiveDate",
    "PercentageOfSharesAcquired",
    "AcquirerRegion",
    "TargetRegion",
    "PricePerShare",
    "CurrencyCodeOfTransaction",
    "DealPurpose",
    "FormOfTransactionName",
    "TransactionStatus",
    "InitialPricePerShare",
    "PremiumAsOn1DayPriorToAnnouncementDate",
    "PremiumAsOn1WeekPriorToAnnouncementDate",
    "PremiumAsOn4WeeksPriorToAnnouncementDate",
    "AcquirerPublicStatus",
    "TargetPublicStatus",
    "AcquirerMidIndustry",
    "TargetMidIndustry",
    "AnalystEstimatedValue",
    "RatioOfTransactionValueToNetIncome",
    "RatioOfTransactionValueToCashFlow",
    "RatioOfTransactionValueToSales",
    "RatioOfRankvalueToNetIncome",
    "RatioOfRankvalueToCashFlow",
    "RatioOfRankvalueToSales",
    "TransactionValueIncludingNetDebtOfTarget"
]

select_string = ", ".join(to_select)

# Execute the search in batches and collect the results
for filter_string in filter_batches:
    deals_df = ld.discovery.search(
        view=search.Views.DEALS_MERGERS_AND_ACQUISITIONS,
        select=select_string,
        filter=filter_string
    )
    deals.append(deals_df)

# Combine all the individual DataFrames into one
combined_deals_df = pd.concat(deals, ignore_index=True)

In [16]:
# Set the maximum number of rows to display
pd.set_option('display.max_rows', None)  # Use None to display all rows

# Now print the TransactionStatus column
print(combined_deals_df['TransactionStatus'])

0              Completed
1              Completed
2              Completed
3              Completed
4                Pending
5              Completed
6              Completed
7              Completed
8                Pending
9                Pending
10             Completed
11               Pending
12             Completed
13             Completed
14             Completed
15             Completed
16             Completed
17             Completed
18             Completed
19               Pending
20             Completed
21             Completed
22             Completed
23             Completed
24             Completed
25             Completed
26               Pending
27             Completed
28             Completed
29       Dismissed Rumor
30             Completed
31             Withdrawn
32             Completed
33             Completed
34             Completed
35               Pending
36             Completed
37             Completed
38               Pending
39             Completed


## Filtering
Need to ensure certain conditions are meant in order to be able to label the deal. Specifically, we need to ensure the transaction was completed, valuation data is accessible, and the deal happened at least three years ago so there is sufficient time to asess target performance after the deal was proposed. 

In [9]:
# Convert 'TransactionEffectiveDate' to datetime if it's not already
combined_deals_df['TransactionEffectiveDate'] = pd.to_datetime(combined_deals_df['TransactionEffectiveDate'], errors='coerce')

# Calculate the date three years prior to today
three_years_prior = pd.Timestamp(datetime.now()) - pd.DateOffset(years=3)

# Filter the DataFrame based on the conditions
filtered_deals_df = combined_deals_df[
    (combined_deals_df['TransactionStatus'] == 'Completed') & 
    (combined_deals_df['PercentageOfSharesAcquired'] != 0.0) & 
    ((combined_deals_df['PricePerShare'].notna()) & (combined_deals_df['PricePerShare'] != 0.0)) |  
    ((combined_deals_df['TransactionValueIncludingNetDebtOfTarget'].notna()) & (combined_deals_df['TransactionValueIncludingNetDebtOfTarget'] != 0.0)) & 
    (combined_deals_df['TransactionEffectiveDate'] < three_years_prior) & 
    (combined_deals_df['TransactionEffectiveDate'].notna())
]

# Display the length of the filtered DataFrame and the first few rows
print(f"Length of flitered_deals_df: {len(filtered_deals_df)}")
filtered_deals_df.head()

Length of flitered_deals_df: 2021


Unnamed: 0,AcquirerName,TargetName,TransactionEffectiveDate,PercentageOfSharesAcquired,AcquirerRegion,TargetRegion,CurrencyCodeOfTransaction,FormOfTransactionName,TransactionStatus,AcquirerPublicStatus,...,DealPurpose,PricePerShare,InitialPricePerShare,PremiumAsOn1DayPriorToAnnouncementDate,PremiumAsOn1WeekPriorToAnnouncementDate,PremiumAsOn4WeeksPriorToAnnouncementDate,RatioOfRankvalueToNetIncome,RatioOfRankvalueToCashFlow,RatioOfRankvalueToSales,AnalystEstimatedValue
2,[Abb Asea Brown Boveri Ltd],[ABB India Ltd],2018-11-02,5.918,Europe,Asia Pacific Excluding Central Asia,RE,Acquisition Of Partial Interest,Completed,Subsidiary,...,[Strengthen operations],20.164,20.164,1970-01-01 00:00:00.000000016,1970-01-01 00:00:00.000000014,1970-01-01 00:00:00.000000006,62.628,34.009,2.897,
3,[Abb Asea Brown Boveri Ltd],[ABB India Ltd],2010-08-13,22.892,Europe,Asia Pacific Excluding Central Asia,RE,Acquisition Of Partial Interest,Completed,Subsidiary,...,[General restructuring of business/operations],19.785,19.785,1970-01-01 00:00:00.000000033,1970-01-01 00:00:00.000000028,1970-01-01 00:00:00.000000005,52.3,31.847,2.974,
6,[Tube Investments of India Ltd],[CG Power and Industrial Solutions Ltd],2020-11-26,56.612,Asia Pacific Excluding Central Asia,Asia Pacific Excluding Central Asia,RE,Acquisition Of Majority Interest,Completed,Public,...,,0.114,0.114,1970-01-01 00:00:00.000000000,1970-01-01 00:00:00.000000020,1970-01-01 00:00:00.000000035,-1.833,57.024,0.775,100.146882
7,[Carl Bennet AB],[Lifco AB (publ)],2000-09-08,78.5,Europe,Europe,SKR,Acquisition Of Majority Interest,Completed,Private,...,,4.872,4.872,1970-01-01 00:00:00.000000004,1970-01-01 00:00:00.000000020,1970-01-01 00:00:00.000000010,15.006,5.257,0.444,
10,[Doosan Enerbility Co Ltd],[Doosan Enerbility Co Ltd],2019-12-06,6.384,Asia Pacific Excluding Central Asia,Asia Pacific Excluding Central Asia,WON,Buyback,Completed,Public,...,[Other],5.461,5.461,1970-01-01 00:00:00.000000000,1969-12-31 23:59:59.999999998,1969-12-31 23:59:59.999999994,-29.279,9.449,0.717,


## Download the Dataset
The next step from here is to label the dataset in retroactive_dcf.ipynb and acquisition_labeling.ipynb

In [10]:
csv_filename = 'executed_acquisition_deals.csv'
filtered_deals_df.to_csv(csv_filename, index=False)

print(f'Data saved to {csv_filename}')

Data saved to executed_acquisition_deals.csv
