## [ 專案分享說明 ]

**專案名稱**：FB Contest Intelligence Analyzer (個人 Side Project)

**開發者**：Eason

**原始開發時間**：2020 年

**本次分享調整摘要**：

1. **資訊安全與環境適配 (Security & Portability)**
* 進行去識別化處理，移除所有個人登入憑證與測試路徑。

2. **程式碼導讀與註解強化 (Documentation)**
* 針對核心函式補充註解說明，並同步部分命名結構，以提升第三方閱讀之流暢度。
* 於 github requirement.txt 補齊開發環境依賴說明，便於理解當時之技術選型。

3. **分析成果驗證保留 (Result Showcase)**
* 特意保留 2020 年之原始執行輸出結果，旨在展示「去重缺口分析」於實務競賽中識破異常灌水行為之應用成效。



---

> **備註**：本專案僅供程式架構與分析邏輯評估參考。由於 Facebook 網頁結構時常更新，部分自動化元素定位（XPath/CSS Selector）可能與現行版本不同。


In [None]:
# -*- coding: utf-8 -*-

import pandas as pd
import re, time, requests
from selenium import webdriver
from bs4 import BeautifulSoup
from selenium.webdriver.common.keys import Keys

In [None]:
#---------------config-------------
# 登入資訊
LOGIN_ID = 'your_account@example.com'
LOGIN_PW = 'your_password'

# 競賽關鍵字設定
SEARCH_KEY_1 = '#雨潔我要抽'
SEARCH_KEY_2 = '5000'

#--------------function--------------
# 進入貼文, 展開留言到最底
def expand(url):
    driver.get(url)
    try:
        driver.find_element_by_xpath('//a[@lang="en_US"]').click()
    except:
        print("Now is in EN_US")
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')

    # 點擊「comments」，藉以展開留言
    try:
        driver.find_element_by_xpath('//div[@class="_5pcr userContentWrapper"]//a[@data-testid="UFI2CommentsCount/root"]').click()
        time.sleep(1)
        driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        time.sleep(1)
        driver.find_element_by_id('expanding_cta_close_button').click()
    except:
        print('There is no comment!')

    k = 1
    while k != 0:
        k = 0
        for i in driver.find_elements_by_xpath('//div[@class="_5pcr userContentWrapper"]//div[@data-testid="UFI2CommentsList/root_depth_0"]//a[@role="button"]'):
            # 反覆偵測是否有「看更多留言」、「看更多回覆」與「看完整貼文內容」等按鈕，若有則點擊
            if bool(re.search('comment|More|Repl',i.text)) == True :
                driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
                time.sleep(0.5)
                try:
                    driver.find_element_by_xpath('//div[@style="display: block;"]//a[@id="expanding_cta_close_button"]').click()
                except:
                    pass
                    #print('No popup!')
                try:
                    i.click()
                except:
                    print('Nothing')
                time.sleep(1)
                k += 1

    #Final check popup
    driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
    time.sleep(1)
    try:
        driver.find_element_by_xpath('//a[@id="expanding_cta_close_button"]').click() #check popup
    except:
        print('No popup!')
    print('reply message expanding finish!')


# 爬留言
def crawl_comment(soup):
    comments = pd.DataFrame()
    # po文區塊
    user_content = soup.find('div', {'class':'_5pcr userContentWrapper'})

    # 回應貼文的留言
    for i in user_content.findAll('div', {'data-testid':'UFI2Comment/root_depth_0'}):
        try:
            comment_content = i.find('span', {'dir':'ltr'}).text
        except:
            comment_content = 'Sticker'

        comment = pd.DataFrame(data = [{'CommentID': i.find('a', {'class':'_3mf5 _3mg0'}).attrs['data-hovercard'].split('id=',2)[1],
                                        'CommentName': i.find('img').attrs['alt'],
                                        'CommentTime': i.find('abbr',{'class':'livetimestamp'}).attrs['data-tooltip-content'],
                                        'CommentContent': comment_content,
                                        'Link': driver.current_url}],
                               columns = ['CommentID', 'CommentName', 'CommentTime', 'CommentContent', 'Link'])
        comments = pd.concat([comments, comment], ignore_index=True)

    # 回應留言的留言
    for i in user_content.findAll('div', {'data-testid':'UFI2Comment/root_depth_1'}):
        try:
            comment_content = i.find('span', {'dir':'ltr'}).text
        except:
            comment_content = 'Sticker'

        comment = pd.DataFrame(data = [{'CommentID': i.find('a', {'class':'_3mf5 _3mg1'}).attrs['data-hovercard'].split('id=',2)[1],
                                        'CommentName': i.find('img').attrs['alt'],
                                        'CommentTime': i.find('abbr',{'class':'livetimestamp'}).attrs['data-tooltip-content'],
                                        'CommentContent': comment_content,
                                        'Link': driver.current_url}],
                               columns = ['CommentID', 'CommentName', 'CommentTime', 'CommentContent', 'Link'])
        comments = pd.concat([comments, comment], ignore_index=True)
    return comments

# 登入控制
def FB_login(fb_id=LOGIN_ID, fb_pw=LOGIN_PW):
    try:
        driver.get("http://en-gb.facebook.com/")
        driver.find_element_by_id('email').send_keys(fb_id)
        driver.find_element_by_id('pass').send_keys(fb_pw)
        driver.find_element_by_id('loginbutton').click()
        print('logging ok!')
    except:
        print('logging error!')

# 競爭者狀態查詢
def competitors_status_summary(n=0):
    # 使用全域設定的 SEARCH_KEY_1 進行搜尋
    time.sleep(2)
    search_box = driver.find_element_by_name('q')
    time.sleep(0.5)
    search_box.send_keys(SEARCH_KEY_1)
    time.sleep(0.5)
    search_box.send_keys(Keys.RETURN)
    time.sleep(2)

    # 貼文來源 = 公開貼文 (維持當時的 XPath 定位)
    driver.find_element_by_xpath("//div[@role='group']/a[5]/label/span").click()

    time.sleep(1)
    soup = BeautifulSoup(driver.page_source)
    while not soup.find('div', {'id':'browse_end_of_results_footer'}):
        driver.execute_script('window.scrollTo(0, document.body.scrollHeight);')
        time.sleep(0.5)
        soup = BeautifulSoup(driver.page_source)

    # 建立所有公開競爭者的留言數表格
    comments_all = pd.DataFrame()

    for i in soup.findAll('a', {'class':'_3hg- _42ft'}):
        try:
            comment_link = i.attrs['href']
            comment_cnt = int(i.text.split('則留言')[0].replace(',',''))
        except:
            comment_link = ''
            comment_cnt = 0

        comment_row = pd.DataFrame(data = [{'CommentLink': comment_link, 'CommentCnt': comment_cnt}],
                                   columns = ['CommentLink', 'CommentCnt'])
        comments_all = pd.concat([comments_all, comment_row], ignore_index=True)

    competitors_status = comments_all

    # 因為點公開會抓不到自己 所以點 "你" 這個元素重抓一遍
    driver.execute_script("var q=document.documentElement.scrollTop=0")
    time.sleep(0.5)
    driver.find_element_by_xpath("//div[@role='group']/a[2]/label/span").click()
    time.sleep(1)
    soup = BeautifulSoup(driver.page_source)

    for i in soup.findAll('a', {'class':'_3hg- _42ft'}):
        try:
            comment_link = i.attrs['href']
            comment_cnt = int(i.text.split('則留言')[0].replace(',',''))
        except:
            comment_link = ''
            comment_cnt = 0

        comment_row = pd.DataFrame(data = [{'CommentLink': comment_link, 'CommentCnt': comment_cnt}],
                                   columns = ['CommentLink', 'CommentCnt'])
        competitors_status = pd.concat([competitors_status, comment_row], ignore_index=True)

    # 取得我的競賽評論數字, 並放入 DF
    my_comment_cnt = int(competitors_status[competitors_status['CommentLink'].str.contains(LOGIN_ID)].iloc[-1, -1])
    competitors_status['my_commentCnt'] = my_comment_cnt

    # 計算 GAP, 篩選並排序, 輸出 (n=0 顯示自己)
    competitors_status['gap'] = competitors_status['CommentCnt'] - competitors_status['my_commentCnt']
    competitors_status = competitors_status[competitors_status['gap'] >= n].sort_values('CommentCnt', ascending=True)

    return competitors_status

#計算各家有效留言數與GAP 檢查作弊的連結
def unique_cnt_check(competitors_status):
    links_wait_check = competitors_status['CommentLink'].tolist()
    distinct_list = []
    for link in links_wait_check:
        time.sleep(3)
        #driver = webdriver.Chrome('./chromedriver', options = chrome_options)
        link = link.replace('www','en-gb') #change to english formate

        expand(link)

        #analyzing soup
        soup = BeautifulSoup(driver.page_source)

        #爬留言
        comment = crawl_comment(soup)
        distinct_commentCnt = comment[comment['CommentContent'].str.contains(SEARCH_KEY_1 + '|' + SEARCH_KEY_2)]['CommentName'].unique().shape[0]
        distinct_list.append(distinct_commentCnt)
        #driver.quit()

    competitors_status['distinct_commentCnt'] = distinct_list
    myCnt_real_str = competitors_status[competitors_status['CommentLink'].str.contains(LOGIN_ID)]['distinct_commentCnt'].iloc[-1]
    competitors_status['my_commentCnt_real'] = int(myCnt_real_str)
    competitors_status['gap_real'] = competitors_status['distinct_commentCnt'] - competitors_status['my_commentCnt_real']
    return competitors_status

In [None]:
#--------------main pipeline1--------------
# Note: This analysis was performed in 2020. UI selectors may have changed.

if __name__ == '__main__':
    ### 關閉Chrome的傳送通知視窗
    chrome_options = webdriver.ChromeOptions()
    prefs = {'profile.default_content_setting_values':
            {'notifications' : 2}
            }
    chrome_options.add_experimental_option('prefs',prefs)
    ###
    driver = webdriver.Chrome('./chromedriver', options = chrome_options)
    time.sleep(1)
    #登入
    FB_login(LOGIN_ID, LOGIN_PW)
    time.sleep(1)
    competitors_status = competitors_status_summary()
    competitors_status = competitors_status[competitors_status['CommentCnt'] <= 1000] # 過濾掉留言過多的離群值，確保爬蟲效能
    driver.quit() #結束登入狀態

logging ok!


In [None]:
#--------------main pipeline2--------------
if __name__ == '__main__':
    driver = webdriver.Chrome('./chromedriver', options = chrome_options)
    time.sleep(1)
    competitors_status_all = unique_cnt_check(competitors_status)
    #competitors_status_all
    competitors_status_all = competitors_status[competitors_status['distinct_commentCnt'] > 0]
    competitors_status_all.to_excel('./FB_Contest_Analysis_Result.xlsx')
    driver.quit()

Now is in EN_US
There is no comment!
No popup!
reply message expanding finish!
Now is in EN_US
There is no comment!
No popup!
reply message expanding finish!
Now is in EN_US
There is no comment!
No popup!
reply message expanding finish!
Now is in EN_US
There is no comment!
No popup!
reply message expanding finish!


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [None]:
#--------------result showcase--------------
competitors_status_all

Unnamed: 0,CommentLink,CommentCnt,my_commentCnt,gap,distinct_commentCnt,my_commentCnt_real,gap_real
9,https://www.facebook.com/uvxy1234/posts/102070...,289,289,0,283,283,0
1,https://www.facebook.com/permalink.php?story_f...,301,289,12,226,283,-57
7,https://www.facebook.com/permalink.php?story_f...,318,289,29,40,283,-243
4,https://www.facebook.com/permalink.php?story_f...,711,289,422,704,283,421
