In [6]:
#This will not run on online IDE 
import requests 
from bs4 import BeautifulSoup 
import csv 

# List of URLs to scrape
urls = [
    "https://edition.cnn.com/2023/12/08/economy/november-jobs-report-final/index.html",
    "https://www.cnbc.com/2023/12/08/jobs-report-november-2023-us-payrolls-rose-199000-in-november-unemployment-rate-falls-to-3point7percent.html",
    "https://www.wishtv.com/news/national/the-us-economy-added-199000-jobs-in-november/",
    "https://www.politico.com/news/2023/12/08/november-us-jobs-report-employment-00130793"
]

# Name of the CSV file to save the data
csv_filename = "scraped_data.csv"

# Open the CSV file for writing
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    # Write the header row
    writer.writerow(['URL', 'Content'])

    for url in urls:
        try:
            r = requests.get(url)
            r.raise_for_status()

            soup = BeautifulSoup(r.content, 'html.parser')

            paragraphs = soup.find_all('p')
            all_paragraphs_text = ' '.join(paragraph.get_text(strip=True) for paragraph in paragraphs)

            # Write the URL and the content to the CSV file
            writer.writerow([url, all_paragraphs_text])

        except requests.exceptions.RequestException as e:
            print(f"Error fetching {url}: {e}")

print(f"Data has been written to {csv_filename}")



from transformers import pipeline

import textwrap

def chunk_text(text, max_chunk_size=1024):
    chunks = textwrap.wrap(text, max_chunk_size)
    return chunks

def summarize_chunks(chunks, model_name="facebook/bart-large-cnn"):
    summarizer = pipeline("summarization", model=model_name)
    summaries = [summarizer(chunk)[0]['summary_text'] for chunk in chunks]
    return ' '.join(summaries)

# Main function to summarize long text
def summarize_long_text(text, max_chunk_size=1024):
    chunks = chunk_text(text, max_chunk_size)
    summary = summarize_chunks(chunks)
    return summary

# Example usage
long_article1 = "Markets Fear & Greed Index Latest Market News The US economy notched another solid month of job growth, with an added lift from actors and autoworkers coming off the picket lines. Employers added 199,000 jobs in November, and the unemployment rate dipped to 3.7% from 3.9% the month before, according to Bureau of Labor Statistics data released Friday. “The economy’s still humming along,” Jane Oates, a former Department of Labor official who now is CEO of employment education nonprofit WorkingNation, told CNN. “For the past two weeks, all we’ve heard is doom and gloom about how this is going to be a terrible day. And it was a much better day than was predicted.” Economists were expecting net job gains of 180,000 for the month and for the unemployment rate to hold steady, according to Refinitiv. The labor force participation rate ticked up 0.1 percentage points to 62.8%, returning to its highest level since the onset of the pandemic. The participation rate increase is a “positive underlying context to the unemployment rate decline,” wrote Daniel Zhao, Glassdoor’s lead economist, in commentary issued Friday. The largest employment gains last month came in health care and government, which added an estimated 93,200 and 49,000 jobs, respectively. Manufacturing saw a boost, too, largely because of thereturn of striking autoworkers, which lifted motor vehicles and parts employment by 30,000 jobs. Additionally, the resolution of the Screen Actors Guild strike against Hollywood studios resulted in 17,200 jobs added in the motion picture and sound recording industries. In total, the BLS was anticipating a net gain of 35,000 workers returning after strikes: Theagency estimatedthat 61,000 workers were absent from the labor market due to labor disputes, versus 96,000 the month before. Taking into account those one-time gains, the underlying rate of job growth is likely around 160,000 jobs per month, which aligns with the 2019 average, wrote Julia Pollak, senior economist with ZipRecruiter. November’s jobs number is in line with the strong monthly gains hit in the decade before the pandemic, when 183,000 jobs per month were added. The current rate of job growth also is well above the “neutral rate,” or what’s needed to keep up with population growth. “It is critical to put this data in the proper context,” Joseph Brusuelas, principal and chief economist of RSM US, wrote in a note on Friday. “Given long-term demographic changes and structural transformation of the US economy to keep employment stable, only 75,000 jobs per month need to be created, in contrast with the roughly 200,000 that was the case just over a decade ago.” The biggest declines occurred in the retail trade and temporary help services sectors, which lost 38,400 jobs and 13,600 jobs, respectively. “The [reason for the] reduction of jobs in retail is very similar to the reduction of jobs in other places, except retail hasn’t been able to absorb — and that’s technology,” Oates said. As e-commerce and in-store pick-ups become more engrained in how people shop, that leads to fewer people needed at the brick-and-mortar level. November’s job growth was stronger thanOctober’s unrevised tally of 150,000 jobs added. September’s job gains were revised down to 262,000 from 297,000, according to the BLS. The continued strength in the labor market has helped fuel consumer spending and economic growth, but Federal Reserve officials believe slower demand (and slower wage growth) will help bring down inflation. Friday’s jobs report showed that average hourly earnings rose 0.4% in November from the month before, showing a more accelerated pace of growth than the 0.2% uptick seen in October and the 0.3% expected by economists. Fed Chair Powell: Too early to say when to expect rate cuts On an annual basis, however, wage gains eased to 4% from the 4.1% rate seen a month before. Through November, the economy has added an average of 232,000 jobs per month — far more moderate growth than 2022 and 2021, when an estimated 399,000 and 606,000 jobs were added every month, respectively. Friday’s strong jobs report likely keeps the Fed’s options open, although cooling inflation should mean that another pause is in store when the central bank meets next week, wrote Lydia Boussour, EY senior economist. “Labor market endurance will lead Fed officials to retain some optionality for future rate hikes, if needed,” she wrote. “We expect policymakers will resist talking about rate cuts until early 2024.” Most stock quote data provided by BATS. US market indices are shown in real time, except for the S&P 500 which is refreshed every two minutes. All times are ET. Factset: FactSet Research Systems Inc. All rights reserved. Chicago Mercantile: Certain market data is the property of Chicago Mercantile Exchange Inc. and its licensors. All rights reserved. Dow Jones: The Dow Jones branded indices are proprietary to and are calculated, distributed and marketed by DJI Opco, a subsidiary of S&P Dow Jones Indices LLC and have been licensed for use to S&P Opco, LLC and CNN. Standard & Poor’s and S&P are registered trademarks of Standard & Poor’s Financial Services LLC and Dow Jones is a registered trademark of Dow Jones Trademark Holdings LLC. All content of the Dow Jones branded indices Copyright S&P Dow Jones Indices LLC and/or its affiliates. Fair value provided by IndexArb.com. Market holidays and trading hours provided by Copp Clark Limited. © 2023 Cable News Network. A Warner Bros. Discovery Company. All Rights Reserved.CNN Sans ™ & © 2016 Cable News Network."
long_article2 = "Credit Cards Loans Banking Mortgages Insurance Credit Monitoring Personal Finance Small Business Taxes Help for Low Credit Scores Investing SELECT AllCredit Cards Find the Credit Card for You Best Credit Cards Best Rewards Credit Cards Best Travel Credit Cards Best 0% APR Credit Cards Best Balance Transfer Credit Cards Best Cash Back Credit Cards Best Credit Card Welcome Bonuses Best Credit Cards to Build Credit SELECT AllLoans Find the Best Personal Loan for You Best Personal Loans Best Debt Consolidation Loans Best Loans to Refinance Credit Card Debt Best Loans with Fast Funding Best Small Personal Loans Best Large Personal Loans Best Personal Loans to Apply Online Best Student Loan Refinance SELECT AllBanking Find the Savings Account for You Best High Yield Savings Accounts Best Big Bank Savings Accounts Best Big Bank Checking Accounts Best No Fee Checking Accounts No Overdraft Fee Checking Accounts Best Checking Account Bonuses Best Money Market Accounts Best CDs Best Credit Unions SELECT AllMortgages Best Mortgages Best Mortgages for Small Down Payment Best Mortgages for No Down Payment Best Mortgages with No Origination Fee Best Mortgages for Average Credit Score Adjustable Rate Mortgages Affording a Mortgage SELECT AllInsurance Best Life Insurance Best Homeowners Insurance Best Renters Insurance Best Car Insurance Travel Insurance SELECT AllCredit Monitoring Best Credit Monitoring Services Best Identity Theft Protection How to Boost Your Credit Score Credit Repair Services SELECT AllPersonal Finance Best Budgeting Apps Best Expense Tracker Apps Best Money Transfer Apps Best Resale Apps and Sites Buy Now Pay Later (BNPL) Apps Best Debt Relief SELECT AllSmall Business Best Small Business Savings Accounts Best Small Business Checking Accounts Best Credit Cards for Small Business Best Small Business Loans Best Tax Software for Small Business SELECT AllTaxes Best Tax Software Best Tax Software for Small Businesses Tax Refunds SELECT AllHelp for Low Credit Scores Best Credit Cards for Bad Credit Best Personal Loans for Bad Credit Best Debt Consolidation Loans for Bad Credit Personal Loans if You Don't Have Credit Best Credit Cards for Building Credit Personal Loans for 580 Credit Score or Lower Personal Loans for 670 Credit Score or Lower Best Mortgages for Bad Credit Best Hardship Loans How to Boost Your Credit Score SELECT AllInvesting Best IRA Accounts Best Roth IRA Accounts Best Investing Apps Best Free Stock Trading Platforms Best Robo-Advisors Index Funds Mutual Funds ETFs Bonds  Job creation showed little signs of a letup in November, as payrolls grew even faster than expected and the unemployment rate fell despite signs of a weakening economy. Nonfarm payrolls rose by a seasonally adjusted 199,000 for the month, slightly better than the 190,000 Dow Jones estimate and ahead of the unrevised October gain of 150,000, the Labor Department reported Friday. The numbers were boosted by sizeable gains in government hiring as well as workers returning from strikes in the auto and entertainment industries. The unemployment rate declined to 3.7%, compared with the forecast for 3.9%, as the labor force participation rate edged higher to 62.8%. A more encompassing unemployment rate that includes discouraged workers and those holding part-time positions for economic reasons fell to 7%, a decline of 0.2 percentage point. ""The job market continues to be resilient after a year of dodging recession fears,"" said Daniel Zhao, lead economist at job ratings site Glassdoor. ""Really the one concern that we had coming in today's report was the recent rise in the unemployment rate. So the improvement in unemployment was a welcome relief."" The department's survey of households, used to calculate the unemployment rate, showed much more robust job growth of 747,000 and an addition of 532,000 workers to the labor force. Average hourly earnings, a key inflation indicator, increased by 0.4% for the month and 4% from a year ago. The monthly increase was slightly ahead of the 0.3% estimate, but the yearly rate was in line. Markets showed mixed reaction to the report, with stock market futures modestly negative while Treasury yields surged. ""What we wanted was a strong but moderating labor market, and that's what we saw in the November report,"" said Robert Frick, corporate economist with Navy Federal Credit Union, noting ""healthy job growth, lower unemployment, and decent wage increases. All this points to the labor market reaching a natural equilibrium around 150,000 jobs [per month] next year, which is plenty to continue the expansion, and not enough to trigger a Fed rate hike."" Health care was the biggest growth industry, adding 77,000 jobs. Other big gainers included government (49,000), manufacturing (28,000), and leisure and hospitality (40,000). Heading into the holiday season, retail lost 38,000 jobs, half of which came from department stores. Transportation and warehousing also showed a decline of 5,000. Duration of unemployment fell sharply, dropping to an average 19.4 weeks, the lowest level since February. The report comes at a critical time for the U.S. economy. Though growth defied widespread expectations for a recession this year, most economists expect a sharp slowdown in the fourth quarter and tepid gains in 2024. Gross domestic product is on pace to rise at just a 1.2% annualized pace in the fourth quarter, according to an Atlanta Fed data gauge, and most economists expect growth of around 1% in 2024. Federal Reserve officials are watching the jobs numbers closely as they continue to try to bring down inflation that had been running at a four-decade high but has shown signs of easing. Futures markets pricing strongly points to the Fed halting its rate-hiking campaign and beginning to cut next year, though central bank officials have been more circumspect about what lies ahead. Pricing had been pointing to the first reduction happening in March, though that swung following the jobs report, pushing a higher probability for the first expected cut now to May. The Fed will hold its two-day policy meeting next week, its last of the year, and investors will be looking for clues about how officials view the economy. Policymakers have been aiming to bring the economy in for a soft landing that likely would feature modest growth, a sustainable pace of wage increases and inflation at least receding back to the Fed's 2% target. Consumers hold the key to the U.S. economy, and by most measures they've held up fairly well. Retail sales fell 0.1% in October but were still up 2.5% from the previous year. The numbers are not adjusted for inflation, so they indicate that consumers at least have nearly kept pace with higher prices. A gauge the Fed uses showed inflation running at a 3.5% annual rate in October, excluding food and energy prices. However, there is some worry that the end of Covid-era stimulus payments and the continued pressure from higher interest rates could eat into spending. Net household wealth fell by about $1.3 trillion in the third quarter to about $151 trillion, owing largely to declines in the stock market, according to Fed data released this week. Household debt rose 2.5%, close to the pace where it has been for the past several quarters. Fed officials have been watching wage data closely. Rising prices tend to feed into wages, potentially creating a spiral that can be difficult to control. Don't miss these stories from CNBC PRO: Got a confidential news tip? We want to hear from you. Sign up for free newsletters and get more CNBC delivered to your inbox Get this delivered to your inbox, and more info about our products and services. ©2023CNBC LLC. All Rights Reserved.A Division of NBCUniversal Data is a real-time snapshot *Data is delayed at least 15 minutes. Global Business and Financial News, Stock Quotes, and Market Data and Analysis. Data also provided by"
long_article3 = "Minneapolis (CNN) —The US economy notched another solid month of job growth, with an added lift from actors and autoworkers coming off the picket lines.Employers added 199,000 jobs in November, and the unemployment rate dipped to 3.7% from 3.9% the month before, according to Bureau of Labor Statistics data released Friday.“The economy’s still humming along,” Jane Oates, a former Department of Labor official who now is CEO of employment education nonprofit WorkingNation, told CNN. “For the past two weeks, all we’ve heard is doom and gloom about how this is going to be a terrible day. And it was a much better day than was predicted.”Economists were expecting net job gains of 180,000 for the month and for the unemployment rate to hold steady, according to Refinitiv.The labor force participation rate ticked up 0.1 percentage points to 62.8%, returning to its highest level since the onset of the pandemic. The participation rate increase is a “positive underlying context to the unemployment rate decline,” wrote Daniel Zhao, Glassdoor’s lead economist, in commentary issued Friday.One-time boostsThe largest employment gains last month came in health care and government, which added an estimated 93,200 and 49,000 jobs, respectively. Manufacturing saw a boost, too, largely because of thereturn of striking autoworkers, which lifted motor vehicles and parts employment by 30,000 jobs.Additionally, the resolution of the Screen Actors Guild strike against Hollywood studios resulted in 17,200 jobs added in the motion picture and sound recording industries.In total, the BLS was anticipating a net gain of 35,000 workers returning after strikes: Theagency estimatedthat 61,000 workers were absent from the labor market due to labor disputes, versus 96,000 the month before.Taking into account those one-time gains, the underlying rate of job growth is likely around 160,000 jobs per month, which aligns with the 2019 average, wrote Julia Pollak, senior economist with ZipRecruiter.November’s jobs number is in line with the strong monthly gains hit in the decade before the pandemic, when 183,000 jobs per month were added. The current rate of job growth also is well above the “neutral rate,” or what’s needed to keep up with population growth.“It is critical to put this data in the proper context,” Joseph Brusuelas, principal and chief economist of RSM US, wrote in a note on Friday. “Given long-term demographic changes and structural transformation of the US economy to keep employment stable, only 75,000 jobs per month need to be created, in contrast with the roughly 200,000 that was the case just over a decade ago.”Retail drop-offThe biggest declines occurred in the retail trade and temporary help services sectors, which lost 38,400 jobs and 13,600 jobs, respectively.“The [reason for the] reduction of jobs in retail is very similar to the reduction of jobs in other places, except retail hasn’t been able to absorb — and that’s technology,” Oates said.As e-commerce and in-store pick-ups become more engrained in how people shop, that leads to fewer people needed at the brick-and-mortar level.What this means for the FedNovember’s job growth was stronger thanOctober’s unrevised tally of 150,000 jobs added. September’s job gains were revised down to 262,000 from 297,000, according to the BLS.The continued strength in the labor market has helped fuel consumer spending and economic growth, but Federal Reserve officials believe slower demand (and slower wage growth) will help bring down inflation.Friday’s jobs report showed that average hourly earnings rose 0.4% in November from the month before, showing a more accelerated pace of growth than the 0.2% uptick seen in October and the 0.3% expected by economists.On an annual basis, however, wage gains eased to 4% from the 4.1% rate seen a month before.Through November, the economy has added an average of 232,000 jobs per month — far more moderate growth than 2022 and 2021, when an estimated 399,000 and 606,000 jobs were added every month, respectively.Friday’s strong jobs report likely keeps the Fed’s options open, although cooling inflation should mean that another pause is in store when the central bank meets next week, wrote Lydia Boussour, EY senior economist.“Labor market endurance will lead Fed officials to retain some optionality for future rate hikes, if needed,” she wrote. “We expect policymakers will resist talking about rate cuts until early 2024.” Minneapolis (CNN) —The US economy notched another solid month of job growth, with an added lift from actors and autoworkers coming off the picket lines. Employers added 199,000 jobs in November, and the unemployment rate dipped to 3.7% from 3.9% the month before, according to Bureau of Labor Statistics data released Friday. “The economy’s still humming along,” Jane Oates, a former Department of Labor official who now is CEO of employment education nonprofit WorkingNation, told CNN. “For the past two weeks, all we’ve heard is doom and gloom about how this is going to be a terrible day. And it was a much better day than was predicted.” Economists were expecting net job gains of 180,000 for the month and for the unemployment rate to hold steady, according to Refinitiv. The labor force participation rate ticked up 0.1 percentage points to 62.8%, returning to its highest level since the onset of the pandemic. The participation rate increase is a “positive underlying context to the unemployment rate decline,” wrote Daniel Zhao, Glassdoor’s lead economist, in commentary issued Friday. The largest employment gains last month came in health care and government, which added an estimated 93,200 and 49,000 jobs, respectively. Manufacturing saw a boost, too, largely because of thereturn of striking autoworkers, which lifted motor vehicles and parts employment by 30,000 jobs. Additionally, the resolution of the Screen Actors Guild strike against Hollywood studios resulted in 17,200 jobs added in the motion picture and sound recording industries. In total, the BLS was anticipating a net gain of 35,000 workers returning after strikes: Theagency estimatedthat 61,000 workers were absent from the labor market due to labor disputes, versus 96,000 the month before. Taking into account those one-time gains, the underlying rate of job growth is likely around 160,000 jobs per month, which aligns with the 2019 average, wrote Julia Pollak, senior economist with ZipRecruiter. November’s jobs number is in line with the strong monthly gains hit in the decade before the pandemic, when 183,000 jobs per month were added. The current rate of job growth also is well above the “neutral rate,” or what’s needed to keep up with population growth. “It is critical to put this data in the proper context,” Joseph Brusuelas, principal and chief economist of RSM US, wrote in a note on Friday. “Given long-term demographic changes and structural transformation of the US economy to keep employment stable, only 75,000 jobs per month need to be created, in contrast with the roughly 200,000 that was the case just over a decade ago.” The biggest declines occurred in the retail trade and temporary help services sectors, which lost 38,400 jobs and 13,600 jobs, respectively. “The [reason for the] reduction of jobs in retail is very similar to the reduction of jobs in other places, except retail hasn’t been able to absorb — and that’s technology,” Oates said. As e-commerce and in-store pick-ups become more engrained in how people shop, that leads to fewer people needed at the brick-and-mortar level. November’s job growth was stronger thanOctober’s unrevised tally of 150,000 jobs added. September’s job gains were revised down to 262,000 from 297,000, according to the BLS. The continued strength in the labor market has helped fuel consumer spending and economic growth, but Federal Reserve officials believe slower demand (and slower wage growth) will help bring down inflation. Friday’s jobs report showed that average hourly earnings rose 0.4% in November from the month before, showing a more accelerated pace of growth than the 0.2% uptick seen in October and the 0.3% expected by economists. On an annual basis, however, wage gains eased to 4% from the 4.1% rate seen a month before. Through November, the economy has added an average of 232,000 jobs per month — far more moderate growth than 2022 and 2021, when an estimated 399,000 and 606,000 jobs were added every month, respectively. Friday’s strong jobs report likely keeps the Fed’s options open, although cooling inflation should mean that another pause is in store when the central bank meets next week, wrote Lydia Boussour, EY senior economist. “Labor market endurance will lead Fed officials to retain some optionality for future rate hikes, if needed,” she wrote. “We expect policymakers will resist talking about rate cuts until early 2024.”"
long_article4 = "Employment Friday’s report from the Labor Department showed that the unemployment rate dropped from 3.9% to 3.7%, not far above a five-decade low of 3.4% in April. American employers added nearly 200,000 jobs in November, bolstering optimism that the nation might avoid a recession in its fight against inflation. | Kena Betancur/Getty Images ByAssociated Press 12/08/2023 09:03 AM EST Link Copied The nation’s employers added a solid 199,000 jobs last month and the unemployment rate fell, fresh signs that the economy could achieve an elusive “soft landing,” in which inflation would return to the Federal Reserve’s 2% target without causing a steep recession. Friday’s report from the Labor Department showed that the unemployment rate dropped from 3.9% to 3.7%, not far above a five-decade low of 3.4% in April. The November job gain was a reminder that many employers continue to hire, though last month’s increase was inflated by the return of about 40,000 formerly striking auto workers and actors, who were not at work in October but returned in November. Still, the job market is gradually decelerating along the lines that Fed officials have wanted to see. The Fed has raised its key short-term rate from near zero to about 5.4%, a 22-year peak, leading to higher borrowing rates for consumers and businesses and lower inflation. Despite that headwind, the economy and the job market are still expanding. Layoffs remain historically low. When the Fed meets next week, it is considered sure to keep its benchmark rate unchanged for the third straight time in light of the easing inflation pressures. Most economists and Wall Street traders think the Fed’s next move will be to cut rates, though that might not happen until the second half of 2024. Even modest hiring helps ensure that consumers, who drive most of the economy’s growth, keep spending. Early reports on holiday shopping showed healthy growth in online sales. Many of the most recent economic figures have been encouraging. Companies areadvertising fewer job openings, and Americans are switching jobs less often than they did a year ago, trends that typically slow wage growth and inflation pressures. Hiring is cooling, and price increases have moderated significantly. Still, the number of people receiving unemployment aid, though still low,has risen. And for much of this year, hiring has been concentrated in just a few sectors — notably health care, restaurants and hotels and government — rather than broadly across the economy. Hiring has been cooling as the Fed’s sharp interest rate hikes have raised borrowing costs for consumers and businesses, depressing sales of homes, cars, appliances and other high-priced purchases and investments. For now, most analysts are offering a positive outlook of slower but still steady growth and easing inflation. The economy is expected to expand at a modest 1.5% annual rate in the final three months of this year, down froma scorching 5.2% pace in the July-September quarter. Cooler growth should help bring down inflation while still supporting a modest pace of hiring. The economy is still growing even after the Fed has raised its benchmark rate 11 times, from near zero in March 2022. The aggressive pace of those hikes has made mortgages, auto loans and business borrowing much more expensive. At the same time, inflation has tumbled from apeak of 9.1% in June 2022tojust 3.2% last month. And according to a different inflation measure that the Fed prefers, prices rose at just a 2.5% annual rate in the past six months — not far below the central bank’s target. Such progress has fueled speculation in the financial markets that the Fed could soon cut its benchmark rate, perhaps as early as March. Wall Street traders now expect five rate cuts next year, according to futures prices tracked byCME FedWatch. Most economists envision fewer. Christopher Waller, a key Fed official who typically favors higher rates,buoyed the markets’ expectations last weekwhen he suggested that if inflation kept falling, the Fed could cut rates as early as spring. Fed Chair Jerome Powell, though, pushed back against such speculationlast Friday, when he said it was “premature to conclude” that the Fed has raised its benchmark rate high enough to quell inflation. And it was too soon, he added, to “speculate” about when the Fed might cut rates. But Powell also said interest rates are “well into” restrictive territory, meaning that they’re clearly constraining growth. Many analysts took that remark as a signal that the Fed is done raising rates. Link Copied © 2023 POLITICO LLC"
summary1 = summarize_long_text(long_article1)
summary2 = summarize_long_text(long_article2)
summary3 = summarize_long_text(long_article3)
summary4 = summarize_long_text(long_article4)

print(summary1)
print(summary2)
print(summary3)
print(summary4)

Data has been written to scraped_data.csv


Your max_length is set to 142, but your input_length is only 103. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=51)
Your max_length is set to 142, but your input_length is only 119. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=59)


In [3]:
import scrapy
from bs4 import BeautifulSoup
from scrapy.exceptions import DropItem
from datetime import datetime
# from Crawler.items import MarketItem
import logging

class marketspiderSpider(scrapy.Spider):
    name = "marketspider"
    allowed_domains = ["asiafinancial.com"]
    start_urls = ["https://www.asiafinancial.com/insights/"]

    custom_settings = {
        'FEEDS': {
            'market_data.json ': {'format': 'json', 'overwrite': True},
        }
    }

    def parse(self, response):
        current_datetime = datetime.now().strftime('%Y-%m-%d %H:%M:%S') 
        try:
            soup = BeautifulSoup(response.body, 'html.parser')
            HotNews = soup.select('div.col-md-8 div.tt-post.has-thumbnail.type-6.clearfix.post-430.post.type-post.status-publish.format-standard.has-post-thumbnail.hentry.category-business.category-culture.tag-all.tag-health.tag-politics')

            for Hot in HotNews:
                relative_url = Hot.select_one('div.tt-post-info a.tt-post-title.c-h5')['href']
                yield response.follow(relative_url, callback=self.parse_news_content)

        except Exception as e:
            logging.error('Error processing %s: %s', current_datetime, response.url, str(e))

        print(soup)

Employers added 199,000 jobs in November, and the unemployment rate dipped to 3.7%. The labor force participation rate ticked up 0.1 percentage points to 62.8%, returning to its highest level since the onset of the pandemic. “The economy’s still humming along,” Jane Oates, a former Department of Labor official, said. The largest employment gains last month came in health care and government. Manufacturing saw a boost, too, largely because of thereturn of striking autoworkers. The resolution of the Screen Actors Guild strike against Hollywood studios resulted in 17,200 jobs added in the motion picture and sound recording industries. The current rate of job growth is well above the “neutral rate,” or what’s needed to keep up with population growth. The biggest declines occurred in the retail trade and temporary help services sectors, which lost 38,400 jobs and 13,600 jobs, respectively. Average hourly earnings rose 0.4% in November from the month before. September’s job gains were revise

In [33]:
import os 
import openai as ai

ai.api_key = "somthing here"

# use gpt4-turbo
# should not let the input adn output limit the article

def generate_gpt3_response(user_text, print_output=False):
    """
    Query OpenAI GPT-3 for the specific key and get back a response
    :type user_text: str the user's text to query for
    :type print_output: boolean whether or not to print the raw output JSON
    """
    articles = ai.Completion.create(
        engine='gpt-4-32k',  # Determines the quality, speed, and cost. this gives 32K tokens
        temperature=0.6,            # Level of creativity in the response
        prompt=user_text,           # What the user typed in
        max_tokens=2010,             # Maximum tokens in the prompt AND response
        n=1,                        # The number of completions to generate
        stop=None,                  # An optional setting to control response generation
    )

    # Displaying the output can be helpful if things go wrong
    if print_output:
        print(articles)

    # Return the first choice's text
    return articles.choices[0].text


if __name__ == '__main__':
    prompt = 'Write a formal news article based on the following summaries' + article1 + article2+ article3 + article4 + 
    'The article should incorporate all relevant details from these summaries and present them in a cohesive, fact-checked narrative.' + 
    ' Ensure the article maintains a formal tone throughout and reaches a minimum word count of 1000 words.'
    response = generate_gpt3_response(prompt)
    
    print(response)



American employers added nearly 200,000 jobs in November, providing a glimmer of hope that the economy might avoid a recession in its fight against inflation. According to the latest report from the Labor Department, the unemployment rate dipped from 3.9% to 3.7%, not far above a five-decade low of 3.4% in April.

The job market is gradually decelerating along the lines that Federal Reserve officials have wanted to see. The Federal Reserve has raised its key short-term rate from near zero to about 5.4%, a 22-year peak. Most economists and Wall Street traders think the Fed’s next move will be to cut rates.

The largest employment gains last month came in health care and government, adding an estimated 93,200 and 49,000 jobs, respectively. The resolution of the Screen Actors Guild strike against Hollywood studios resulted in 17,200 jobs added. Manufacturing saw a boost, too, largely because of the return of striking autoworkers.

The biggest declines occurred in the retail trade and te

In [59]:
import mysql.connector
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


# Function to connect to the MySQL database and fetch the last 30 entries
def fetch_last_entries(host, user, password, database):
    # Connect to the MySQL database
    conn = mysql.connector.connect(
        host=host,
        user=user,
        password=password,
        database=database
    )
    cursor = conn.cursor()
    
    # Fetch the latest 30 published posts
    query = "SELECT post_content FROM sim76_posts WHERE post_status = 'publish' ORDER BY post_date DESC LIMIT 100"
    cursor.execute(query)
    results = cursor.fetchall()
    
    # Close the database connection
    cursor.close()
    conn.close()
    
    return [entry[0] for entry in results]

# Function to compare the content of the news articles using cosine similarity
def compare_news_articles(contents):
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(contents)
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
    return cosine_sim

# Database credentials
db_credentials = {
    'host': 'localhost',
    'user': 'root',
    'password': 'Aa12369857826',
    'database': 'testnews'
}

# Fetch the latest 30 news articles content from the database
articles_content = fetch_last_entries(**db_credentials)

if articles_content:
    for i, article in enumerate(articles_content):
        print(f"Article {i+1}:")
        print(article)
        print("-----\n")
    
    # Then compare their contents
    similarity_matrix = compare_news_articles(articles_content)
    # Print the similarity matrix
    print("Cosine Similarity Matrix:")
    print(similarity_matrix)
else:
    print("No content fetched from the database or there are less than 30 articles.")




Article 1:
ASTANA – The Kazakh company Future Nft Technology limited launched a blockchain copyright protection platform dubbed Central Asian Intellectual Property Registry (CARRIP) that registers intellectual property rights and monitors transfer transactions, reported Kazinform on Jan. 31. The platform was launched at the end of 2023 and is gaining popularity among users, said company’s advisor Temirlan Tulegenov. After registering on the platform, users can upload a file and categorize it as an audio recording, a text document, a video, a photo, a drawing, or another file.    The system determines the price and users can receive the certificate with the date and time depending on the size of the file. This certificate serves as proof that users retained their intellectual property on CARRIP.    Kazakhstan has been developing its creative industries in recent years. According to the United Nations Conference on Trade and Development (UNCTAD), this sector includes 14 spheres: design, 

    # Using a more advanced model for sentence embeddings
    # 1. all-mpnet-base-v2 ----- nah
    # 2. all-distilroberta-v1 ------ best
    # 3. bert-base-nli-mean-tokens ----- nah
    # 4. roberta-base-nli-stsb-mean-tokens ----- nah
    #5. distilbert-base-nli-stsb-mean-tokens ----- nah
    # 6. paraphrase-mpnet-base-v2 ----- nah
    # 7. roberta-large-nli-stsb-mean-tokens ----- nah
    # 8. paraphrase-distilroberta-base-v1 ----- nah
    # 9. sentence-t5-xl ----- not bad samilar to 2
    # 10. sentence-t5-xxl  ----- ideal

In [30]:
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import spacy

# Load spaCy's medium-sized English language model for preprocessing
nlp = spacy.load('en_core_web_md')

def preprocess_text(text):
    doc = nlp(text.lower())
    lemmatized_text = ' '.join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct and not token.like_num])
    return lemmatized_text

contents = []
for article in data:
    print("Original content:", article['content'])  # Print the original content
    preprocessed_content = preprocess_text(article['content'])
    print("Preprocessed content:", preprocessed_content)  # Optional: Print the preprocessed content
    contents.append(preprocessed_content)


def compare_news_articles(contents):

    model = SentenceTransformer('LASER')
    embeddings = model.encode(contents)
    cosine_sim = cosine_similarity(embeddings, embeddings)
    return cosine_sim

def find_unique_similar_article_pairs(similarity_matrix, threshold):
    compared_articles = set()  # Keep track of articles that have already been compared
    article_similarities = {}  # Store the unique comparisons

    for i in range(len(similarity_matrix)):
        if i in compared_articles:
            continue  # Skip this article if it has already been compared

        similar_articles = []
        for j in range(i + 1, len(similarity_matrix)):  # Only compare with articles ahead to avoid repetition
            if similarity_matrix[i, j] > threshold:
                similar_articles.append((j + 1, similarity_matrix[i, j]))
                compared_articles.add(j)  # Mark this article as compared

        if similar_articles:
            article_similarities[i + 1] = similar_articles
            compared_articles.add(i)  # Also mark the current article as compared

    return article_similarities

similarity_matrix = compare_news_articles(contents)
print("Cosine Similarity Matrix:")
print(similarity_matrix)

threshold = 0.85  # Adjust this based on your observation
article_similarities = find_unique_similar_article_pairs(similarity_matrix, threshold)
if article_similarities:
    for article, similarities in article_similarities.items():
        similar_articles_str = ", ".join([f"Article {pair[0]} with a similarity score of {pair[1]:.2f}" for pair in similarities])
        print(f"Article {article} is similar to {similar_articles_str}.")
else:
    print("No similar article pairs found above the threshold.")




Preprocessed content: chinese state back hacker experiment openai generative artificial intelligence ai tool gain information rival technology firm key backer microsoft say software giant say state back hacker north korea iran russia openai tool hone skill trick target company announce finding roll blanket ban state back hacking group ai product wednesday    independent violation law violation term service want actor identify track know threat actor kind want access technology microsoft vice president customer security tom burt say blog post microsoft say track hack group affiliate russian military intelligence iran revolutionary guard chinese north korean government large language model llm blog post see technology like user microsoft burt say chinese state back hacker llm seek information global intelligence agency domestic concern notable individual cybersecurity matter topic strategic interest threat actor microsoft note blog llm develop code potential malicious intent translate co



OSError: sentence-transformers/LASER is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`

In [2]:
from sentence_transformers import SentenceTransformer

# This will download and cache the model locally
model = SentenceTransformer('distilbert-base-nli-stsb-mean-tokens')

modules.json: 100%|██████████| 349/349 [00:00<00:00, 379kB/s]
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
config_sentence_transformers.json: 100%|██████████| 116/116 [00:00<?, ?B/s] 
README.md: 100%|██████████| 10.7k/10.7k [00:00<00:00, 10.5MB/s]
sentence_bert_config.json: 100%|██████████| 53.0/53.0 [00:00<00:00, 53.2kB/s]
config.json: 100%|██████████| 612/612 [00:00<00:00, 564kB/s]
pytorch_model.bin: 100%|██████████| 90.9M/90.9M [00:01<00:00, 52.2MB/s]
tokenizer_config.json: 100%|██████████| 350/350 [00:00<00:00, 344kB/s]
vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 985kB/s]
tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 657kB/s]
special_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 261kB/s]
1_Pooling/config.json: 100%|██████████| 190/190 [0

In [30]:
import mysql.connector
from mysql.connector import Error
import logging
from datetime import datetime

class DatabaseConnector:
    def __init__(self):
        self.conn = None
        try:
            self.conn = mysql.connector.connect(
                host='20.24.22.27',
                database='techtodate_test1',
                user='techtodateuser@localhost',
                password='techTodate'
            )
            if self.conn.is_connected():
                self.cur = self.conn.cursor()
                print('Connected to database')
        except Error as e:
            print(f'Error: {e}')

    def insert_into_db(self, header, subheader, content):
        # Your existing method logic here
        pass

# Usage
db_connector = DatabaseConnector()
db_connector.insert_into_db(header="Test Header", subheader="Test Subheader", content="Test Content")


Connected to database


In [11]:
import mysql.connector
from mysql.connector import Error

try:
    # Establish the connection without SSL
    cnx = mysql.connector.connect(
        user="tech",
        password="Quest111",
        host="wpdata.mysql.database.azure.com",
        database="wordpress"
    )
    
    if cnx.is_connected():
        print("Successfully connected to the database without SSL")
        # Perform database operations here...

except Error as e:
    print(f"Error while connecting to MySQL: {e}")

finally:
    if cnx.is_connected():
        cnx.close()
        print("MySQL connection is closed")


Successfully connected to the database without SSL
MySQL connection is closed
