In [1]:
import pandas as pd
import numpy as np
import re
from sklearn.feature_extraction.text import TfidfVectorizer


In [2]:
df = pd.read_csv("tennis_articles_v4.csv")

df.head()


Unnamed: 0,article_id,article_text,source
0,1,Maria Sharapova has basically no friends as te...,https://www.tennisworldusa.org/tennis/news/Mar...
1,2,"BASEL, Switzerland (AP), Roger Federer advance...",http://www.tennis.com/pro-game/2018/10/copil-s...
2,3,Roger Federer has revealed that organisers of ...,https://scroll.in/field/899938/tennis-roger-fe...
3,4,Kei Nishikori will try to end his long losing ...,http://www.tennis.com/pro-game/2018/10/nishiko...
4,5,"Federer, 37, first broke through on tour over ...",https://www.express.co.uk/sport/tennis/1036101...


In [3]:
def split_sentences(text):
    text = re.sub(r'\s+', ' ', text)
    sentences = re.split(r'(?<=[.!?])\s+', text)
    return [s.strip() for s in sentences if len(s.strip()) > 20]


In [4]:
def summarize_text(text, n_sentences=3):
    sentences = split_sentences(text)
    
    if len(sentences) <= n_sentences:
        return sentences
    
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(sentences)
    
    sentence_scores = np.mean(tfidf_matrix.toarray(), axis=1)
    
    top_indices = sentence_scores.argsort()[::-1][:n_sentences]
    top_indices = sorted(top_indices)
    
    summary = [sentences[i] for i in top_indices]
    return summary


In [5]:
for idx, row in df.iterrows():
    print(f"\n=== ARTICLE ID {row['article_id']} ===")
    summary = summarize_text(row['article_text'], n_sentences=2)
    
    for s in summary:
        print("-", s)



=== ARTICLE ID 1 ===
- The Russian player has no problems in openly speaking about it and in a recent interview she said: 'I don't really hide any feelings too much.
- When I'm on the courts or when I'm on the court playing, I'm a competitor and I want to beat every single person whether they're in the locker room or across the net.So I'm not the one to strike up a conversation about the weather and know that in the next few minutes I have to go and try to win a tennis match.

=== ARTICLE ID 2 ===
- BASEL, Switzerland (AP), Roger Federer advanced to the 14th Swiss Indoors final of his career by beating seventh-seeded Daniil Medvedev 6-1, 6-4 on Saturday.
- Copil fired 26 aces past Zverev and never dropped serve, clinching after 2 1/2 hours with a forehand volley winner to break Zverev for the second time in the semifinal.

=== ARTICLE ID 3 ===
- Speaking at the Swiss Indoors tournament where he will play in Sundays final against Romanian qualifier Marius Copil, the world number three 