In [None]:
Creating a web scraper using Node.js, Puppeteer, and JavaScript involves several steps. Puppeteer is a headless browser automation library that can be used to navigate web pages, interact with page elements, and extract data.

Here's a basic example to get you started:

Install Node.js and npm if you haven't already.

Create a new Node.js project and install Puppeteer:

bash
Copy code
npm init -y
npm install puppeteer
Create a JavaScript file (e.g., scraper.js) and implement the web scraper:
javascript
Copy code
const puppeteer = require('puppeteer');
const fs = require('fs');
const csvWriter = require('csv-writer').createObjectCsvWriter;

const companies = [
  // List of companies goes here
];

async function scrapeLinkedIn(company) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();

  try {
    // Navigate to LinkedIn
    await page.goto('https://www.linkedin.com', { waitUntil: 'domcontentloaded' });

    // Login to LinkedIn (you may need to handle this part manually)

    // Search for the company on LinkedIn
    await page.type('input[placeholder="Search"]', `${company} LinkedIn`);
    await page.keyboard.press('Enter');
    await page.waitForNavigation();

    // Extract company details
    const companyLink = await page.$eval('a.search-result__result-link', (link) => link.href);

    // Navigate to the company's LinkedIn page
    await page.goto(companyLink, { waitUntil: 'domcontentloaded' });
    await page.waitForSelector('.pv4 div a');

    // Extract CEO, CXO, CFO details
    const personnelDetails = await page.evaluate(() => {
      const titles = ['CEO', 'CFO', 'CXO']; // Add other titles as needed
      const details = [];

      for (const title of titles) {
        const element = document.querySelector(`section.pv-profile-section .pv-entity__position-group-pager:first-of-type .pv-entity__position-group:first-of-type .pv-entity__summary-info h3:contains('${title}')`);
        if (element) {
          const name = element.textContent.trim();
          const emailElement = document.querySelector(`section.pv-profile-section .pv-entity__position-group-pager:first-of-type .pv-entity__position-group:first-of-type .pv-entity__summary-info span[aria-label="Email"]`);
          const email = emailElement ? emailElement.textContent.trim() : '';
          const phoneElement = document.querySelector(`section.pv-profile-section .pv-entity__position-group-pager:first-of-type .pv-entity__position-group:first-of-type .pv-entity__summary-info span[aria-label="Phone"]`);
          const phone = phoneElement ? phoneElement.textContent.trim() : '';
          details.push({ title, name, email, phone });
        }
      }

      return details;
    });

    return {
      company,
      websiteLink: companyLink,
      personnelDetails,
    };
  } catch (error) {
    console.error('Error:', error);
  } finally {
    await browser.close();
  }
}

async function main() {
  const scrapedData = [];

  for (const company of companies) {
    const result = await scrapeLinkedIn(company);
    scrapedData.push(result);
  }

  // Write the data to a CSV file
  const csvWriterInstance = csvWriter({
    path: 'output.csv',
    header: [
      { id: 'company', title: 'Company' },
      { id: 'websiteLink', title: 'Website Link' },
      { id: 'personnelDetails.title', title: 'Title' },
      { id: 'personnelDetails.name', title: 'Name' },
      { id: 'personnelDetails.email', title: 'Email' },
      { id: 'personnelDetails.phone', title: 'Phone' },
    ],
  });

  await csvWriterInstance.writeRecords(scrapedData);
}

main();




