In [1]:
import os
import sys
import pandas as pd
import argparse
from datetime import datetime, timedelta
import pathlib

# 新增：將上一層目錄加入 sys.path
parent_path = pathlib.Path().absolute().parent
sys.path.insert(0, str(parent_path))
# Setup Django environment
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'website_configs.settings')
import django
django.setup()
# 重要：設定環境變數以允許在 Jupyter 的異步環境中執行同步操作
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Now we can import Django models
from app_user_keyword_association_db.models import NewsData

In [2]:
from django.db.models import Q, Max, F
from collections import Counter

In [3]:
# Searching keywords from "content" column
# This function now uses database queries instead of pandas
def filter_database_fullText(user_keywords, cond, cate, weeks):
    # Get the latest date in the database
    latest_date = NewsData.objects.aggregate(max_date=Max('date'))['max_date']
    
    # Calculate start date
    start_date = latest_date - timedelta(weeks=weeks)
    
    # Base query - filter by date range
    queryset = NewsData.objects.filter(date__gte=start_date, date__lte=latest_date)
    
    # Filter by category if not "全部"
    if cate != "全部":
        queryset = queryset.filter(category=cate)
    
    # Filter by keywords based on condition (AND or OR)
    if cond == 'and':
        # For AND condition, we need all keywords to be present
        for kw in user_keywords:
            queryset = queryset.filter(content__contains=kw)
    elif cond == 'or':
        # For OR condition, any keyword can be present
        q_objects = Q()
        for kw in user_keywords:
            q_objects |= Q(content__contains=kw)
        queryset = queryset.filter(q_objects)
    
    return queryset

In [4]:

user_keywords = ['烏克蘭', '俄羅斯']  # Example keywords
cond = 'and'  # Example condition (and/or), 
cate = '全部'  # Example category (or "全部" for all categories)
weeks = 4  # Example weeks
queryset = filter_database_fullText(user_keywords, cond, cate, weeks)

In [9]:
for news in queryset:
    print(news.title)

俄侵烏克蘭全球憤慨 臉書放寬規定讓人抒發怒火
Apple新品一次看 iPhone SE售1萬3900元起iPad Air有5色[影]
北京冬季帕運閉幕 帕委會主席談和平與希望
俄烏談判露曙光 油價下滑5%
華碩停止對俄羅斯出貨 捐款3000萬賑濟烏克蘭
油價若續漲 朱澤民：今年CPI有可能超過2%
澳洲擴大制裁俄羅斯 歐盟要凍結切爾西老闆資產
俄羅斯提核協議新要求 伊朗外長將赴莫斯科討論
2022酷寒演習展開 3萬北約兵力集結挪威
俄國遭制裁降價求售石油和商品 印度考慮採購
烏克蘭戰事中國疫情添不安 亞股多數收黑
路透社：美中高層已在羅馬會晤
借鑑烏克蘭核電廠遭攻 日研議核廠設專屬警備隊
烏俄進行第4輪談判 烏克蘭代表稱雙方溝通困難
國際博物館協會發聲拒絕戰爭 吳思瑤籲故宮跟進
戰爭時文物如何疏散  故宮3個月內擬對策7月推演
香港恆指暴跌千點 失守2萬點創6年新低


In [None]:
# Limit to k results and get specific fields
news_items = queryset.values('category', 'title', 'link', 'photo_link')[:3]

In [8]:
news_items

<QuerySet [{'category': '科技', 'title': '俄侵烏克蘭全球憤慨 臉書放寬規定讓人抒發怒火', 'link': 'https://www.cna.com.tw/news/ait/202203110088.aspx', 'photo_link': 'https://imgcdn.cna.com.tw/www/WebPhotos/200/20220311/2000x1391_0522240424184.jpg'}, {'category': '科技', 'title': 'Apple新品一次看 iPhone SE售1萬3900元起iPad Air有5色[影]', 'link': 'https://www.cna.com.tw/news/ait/202203090006.aspx', 'photo_link': 'https://imgcdn.cna.com.tw/www/webphotos/WebCover/420/20220309/800x600_644221551345.jpg'}, {'category': '運動', 'title': '北京冬季帕運閉幕 帕委會主席談和平與希望', 'link': 'https://www.cna.com.tw/news/aspt/202203130215.aspx', 'photo_link': None}]>

In [7]:
dates = list(queryset.values_list('date', flat=True))