In [1]:
from classes.PyMongoUtils import PyMongoUtils
from classes.NewsQuery import NewsQuery
from datetime import datetime

In [2]:
uri = "mongodb+srv://yeohyy:9292004@cluster0.rhrp0.mongodb.net/?appName=Cluster0"
uri2 = "mongodb://yeohyy:9292004@cluster0-shard-00-00.rhrp0.mongodb.net:27017,cluster0-shard-00-01.rhrp0.mongodb.net:27017,cluster0-shard-00-02.rhrp0.mongodb.net:27017/?replicaSet=atlas-iqtkyh-shard-0&ssl=true&authSource=admin&retryWrites=true&w=majority&appName=Cluster0"
mongo = PyMongoUtils(uri2)
mongo.ping()

Pinged your deployment. Successfully connected to MongoDB!


In [3]:
collection = mongo.get_collection("News", "AnalysedResult")

In [4]:
nq = NewsQuery(collection)

# Query

### Count how many articles have a positive label and mention [location] in either the title or summary (case insensitive)

In [5]:
nq.count_location_positive("kuala lumpur")

32

### Find top n most recent articles where location is set and predicted_label is either negative or neutral

In [6]:
nq.get_recent_by_location_and_label(3)

[{'_id': ObjectId('67f7ebceb2d3f5bfad017f51'),
  'title': 'Family of seven stranded at Lata Batu Hampar falls due to water surge',
  'summary': 'ipoh a family of seven were trapped at the lata batu hampar waterfall in kuala kangsar due to a headwater surge',
  'publish_time': '2025-03-31 20:44:00',
  'section': 'News',
  'sub_section': 'Nation',
  'keywords': [],
  'location': 'IPOH',
  'predicted_label': 'negative'},
 {'_id': ObjectId('67f7ebceb2d3f5bfad017e9c'),
  'title': 'Myanmar quake: SMART focuses on rescuing victims in places of worship',
  'summary': 'kuala lumpur the special malaysia disaster assistance and rescue team smart is focusing its efforts on locating victims trapped in places of worship affected by the earthquake disaster in myanmars sagaing district',
  'publish_time': '2025-03-31 19:59:00',
  'section': 'News',
  'sub_section': 'Nation',
  'keywords': [],
  'location': 'KUALA LUMPUR',
  'predicted_label': 'negative'},
 {'_id': ObjectId('67f7ebceb2d3f5bfad017ece'),

### Aggregate number of articles grouped by sub_section and count how many have empty keywords

In [7]:
nq.count_empty_keywords_by_sub_section()

[{'_id': 'Nation', 'empty_keywords_count': 190},
 {'_id': 'Aseanplus News', 'empty_keywords_count': 7},
 {'_id': 'World', 'empty_keywords_count': 2},
 {'_id': 'Business News', 'empty_keywords_count': 1}]

### Find the average number of articles per day in given date range

In [8]:
start_date = datetime(2025, 3, 1)
end_date = datetime(2025, 4, 1)

In [9]:
nq.average_articles_per_day(start_date, end_date)

[{'_id': None, 'average_articles_per_day': 10.0}]

### Group articles by predicted_label and find the latest article title and total count per label

In [10]:
nq.group_by_label_latest()

[{'_id': 'positive', 'count': 90, 'latest_article': '2025-03-31 19:11:00'},
 {'_id': 'negative', 'count': 70, 'latest_article': '2025-03-31 20:44:00'},
 {'_id': 'neutral', 'count': 40, 'latest_article': '2025-03-31 16:59:00'}]

###  Top n locations with the most articles having a positive sentiment 


In [11]:
nq.top_locations_with_positive(3)

[{'_id': 'KUALA LUMPUR', 'count': 33},
 {'_id': 'PETALING JAYA', 'count': 12},
 {'_id': 'Location not found', 'count': 6}]

### Retrieve articles where title or summary contains both "communist" and "ideo" (like a stem), and ensure case-insensitive match

In [12]:
keywords = ["communist", "ideo"]

In [13]:
nq.search_articles_with_keywords(keywords)

[{'_id': ObjectId('67f7ebceb2d3f5bfad017e98'),
  'title': 'Cops probing video, organisation suspected of promoting communist ideology, says IGP',
  'summary': 'kuala lumpur police are investigating a viral video clip depicting an unregistered organisation allegedly promoting communist ideologies in the country says tan sri razarudin husain',
  'publish_time': '2025-03-26 23:20:00',
  'section': 'News',
  'sub_section': 'Nation',
  'keywords': [],
  'location': 'KUALA LUMPUR',
  'predicted_label': 'positive'}]

# Report

### Monthly Sentiment Distribution Report

In [14]:
nq. monthly_sentiment_distribution()

[{'_id': {'year': 2025, 'month': 3, 'sentiment': 'positive'}, 'count': 90},
 {'_id': {'year': 2025, 'month': 3, 'sentiment': 'negative'}, 'count': 70},
 {'_id': {'year': 2025, 'month': 3, 'sentiment': 'neutral'}, 'count': 40}]

### Location-wise Article Activity Heatmap

In [15]:
nq.location_sentiment_heatmap()

[{'_id': 'KUALA LUMPUR',
  'total': 62,
  'sentiments': [{'label': 'negative', 'count': 20},
   {'label': 'positive', 'count': 33},
   {'label': 'neutral', 'count': 9}]},
 {'_id': 'PETALING JAYA',
  'total': 18,
  'sentiments': [{'label': 'negative', 'count': 2},
   {'label': 'neutral', 'count': 4},
   {'label': 'positive', 'count': 12}]},
 {'_id': 'KOTA KINABALU',
  'total': 12,
  'sentiments': [{'label': 'positive', 'count': 4},
   {'label': 'negative', 'count': 6},
   {'label': 'neutral', 'count': 2}]},
 {'_id': 'Location not found',
  'total': 10,
  'sentiments': [{'label': 'negative', 'count': 3},
   {'label': 'positive', 'count': 6},
   {'label': 'neutral', 'count': 1}]},
 {'_id': 'IPOH',
  'total': 8,
  'sentiments': [{'label': 'negative', 'count': 3},
   {'label': 'neutral', 'count': 5}]},
 {'_id': 'JOHOR BARU',
  'total': 8,
  'sentiments': [{'label': 'negative', 'count': 2},
   {'label': 'positive', 'count': 3},
   {'label': 'neutral', 'count': 3}]},
 {'_id': 'SHAH ALAM',
  '

### Peak Publishing Hours Report

In [16]:
nq.publishing_peak_hours()

[{'_id': 16, 'count': 3},
 {'_id': 17, 'count': 1},
 {'_id': 18, 'count': 2},
 {'_id': 19, 'count': 14},
 {'_id': 20, 'count': 37},
 {'_id': 21, 'count': 60},
 {'_id': 22, 'count': 51},
 {'_id': 23, 'count': 32}]

###  Articles with Potential Low Content Quality

In [17]:
nq.low_content_quality_articles()

[{'_id': ObjectId('67f7ebceb2d3f5bfad017ec4'),
  'title': "Labourer who attempted to evade arrest gets eight month's jail, RM6,000 fine",
  'summary': 'labourer who attempted to evade arrest gets eight months jail rm fine',
  'summary_length': 69,
  'has_keywords': False},
 {'_id': ObjectId('67f7ebceb2d3f5bfad017ead'),
  'title': 'Over 1,000 attend Iftar event with PM in Tampin',
  'summary': 'tampin more than  attendees enlivened the iftar event with prime minister datuk seri anwar ibrahim',
  'summary_length': 98,
  'has_keywords': False},
 {'_id': ObjectId('67f7ebceb2d3f5bfad017f24'),
  'title': 'Melaka named official venue for Visit Malaysia Year 2026 launch',
  'summary': 'melaka historic city is designated as the official venue for the launch of visit malaysia year  vmy',
  'summary_length': 99,
  'has_keywords': False},
 {'_id': ObjectId('67f7ebceb2d3f5bfad017f44'),
  'title': 'Two men slashed to death in a fight at Taman Sentosa',
  'summary': 'klang two men were killed after b