<a href="https://colab.research.google.com/github/yin-june/Zadaqah-charity-app/blob/main/FundAllocationProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Zakat Pool + AI Fund Allocation**
Responsibility : Build pooled donation screen, AI-based fund allocator, DAO logic

**Features**:
* AI-powered allocation suggestion engine (ranking model)
* DAO voting for fund distribution
* Voting weights based on contribution amount or token model

**Goal**
* Pool Zakat funds from many donors.
* Use AI to recommend and optimize how the funds should be distributed (based on urgency, need, location, etc.).
* Let donors vote (DAO-style) on how funds are finally distributed — transparently, fairly, and Shariah-compliantly.



# Extract NGO listings
* web-scraped from Hati.my: https://www.hati.my/organisations/

In [None]:
pip install requests beautifulsoup4




In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import requests
from bs4 import BeautifulSoup
import time
import re
import json

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'
}

base_url = 'https://www.hati.my'
all_ngos = []  # Store all scraped NGOs here

# Get all category URLs
def get_all_categories():
    url = f'{base_url}/organisations/'
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    category_links = []
    category_section = soup.find('div', class_='container charities')

    if category_section:
        for a in category_section.find_all('a', href=True):
            href = a['href']
            if '/category/' in href:
                name = a.text.strip()
                category_links.append((name, href))
    return category_links

def decode_cf_email(encoded_str):
    r = int(encoded_str[:2], 16)
    email = ''.join(
        chr(int(encoded_str[i:i+2], 16) ^ r)
        for i in range(2, len(encoded_str), 2)
    )
    return email

def extract_details(detail_url):
    response = requests.get(detail_url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')

    email = 'Not found'
    address = 'Not found'
    website = 'Not found'

    # Extract email from <a class="my_link">
    links = soup.find_all('a', class_='my_link')
    #print("🔍 Found links:", [a['href'] for a in soup.find_all('a', class_='my_link') if 'href' in a.attrs])
    for link in links:
        href = link.get('href', '')
        if '/cdn-cgi/l/email-protection#' in href:
            encoded = href.split('#')[-1]
            email = decode_cf_email(encoded)

        # Check for a valid website URL
        elif href.startswith('http') and 'mailto:' not in href:
            website = href

    # Extract address from <td class="tooltip">
    tooltips = soup.find_all('td', class_='tooltip1')
    for td in tooltips:
        text = td.get_text(separator=' ').strip()
        if any(kw in text.lower() for kw in ['jalan', 'lorong', 'taman', 'lot', 'block', 'persiaran', 'kuala', 'selangor', 'penang', 'johor']) and len(text) > 15:
            address = text
            break
    return email, address, website

def is_zakat_eligible(description, category):
    zakat_keywords = ['asnaf', 'poor', 'needy', 'orphans', 'refugees', 'islam', 'hunger', 'medical', 'health', 'disaster', 'relief', 'b40', 'miskin']
    zakat_categories = ['Children', 'Refugees', 'Health', 'Orang Asli', 'Differently Abled', 'Senior Citizens']

    keyword_match = any(kw in description.lower() for kw in zakat_keywords)
    category_match = any(cat.lower() in category.lower() for cat in zakat_categories)

    return keyword_match or category_match

# scrape each category's NGOs
def scrape_category(category_url, category_name):
    page = 1
    while True:
        url = f"{category_url}page/{page}/"
        print(f"Scraping: {url}")
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            print(f"Request failed: {response.status_code}")
            break

        soup = BeautifulSoup(response.text, 'html.parser')
        ngo_cards = soup.find_all('div', class_='news-content')
        if not ngo_cards:
            print("No more listings found.")
            break

        for card in ngo_cards:
            name_tag = card.find('h2')
            if name_tag and name_tag.a:
                name = name_tag.a.text.strip()
                link = name_tag.a['href']
            else:
                continue

            desc_tag = card.find('p', class_="content")
            description = desc_tag.text.strip() if desc_tag else "No description"

            email, address, website = extract_details(link)

            zakat_result = is_zakat_eligible(description, category_name)
            print(f"Zakat Eligible: {zakat_result}")

            ngo_data = {
                "name": name,
                "category": category_name,
                "description": description,
                "email": email,
                "website": website,
                "address": address,
                "detail_url": link,
                "zakat_eligible": zakat_result
            }

            all_ngos.append(ngo_data)
            print(f"✅ {name} ({category_name})")

            time.sleep(0.5)

        page += 1
        time.sleep(1)

# Run the full scrape
category_links = get_all_categories()
print(f"\n📂 Found {len(category_links)} categories")

for category_name, category_url in category_links:
    scrape_category(category_url, category_name)

# Save to JSON
with open('/content/drive/MyDrive/ngos_malaysia.json', 'w', encoding='utf-8') as f:
    json.dump(all_ngos, f, ensure_ascii=False, indent=2)

print(f"\n🎉 Done! {len(all_ngos)} NGOs saved to ngos_malaysia.json")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Zakat Eligible: False
✅ Persatuan Gasing Pangkah Negeri Kelantan (PEGAPAN) (Culture)
Zakat Eligible: False
✅ Persatuan Gabungan Silat Negeri Kelantan (Culture)
Zakat Eligible: False
✅ Persatuan Seni Silat Gayung Fatani Malaysia Cawangan Kelantan (Culture)
Zakat Eligible: False
✅ Persatuan Seni Silat Cekak Ustaz Hanafi Malaysia Cawangan Kelantan (Culture)
Zakat Eligible: False
✅ Persatuan Seni Silat Terlak Tunggal (Culture)
Zakat Eligible: False
✅ Persatuan Pembatik Melayu Pantai Timur (Culture)
Zakat Eligible: False
✅ Persatuan Seni Purba & Kraftangan Negeri Kelantan (Culture)
Zakat Eligible: False
✅ Persatuan Seni Foto Kelantan (Culture)
Zakat Eligible: False
✅ Persatuan Seni Lukis Kelantan (PESENI) (Culture)
Zakat Eligible: False
✅ Persatuan Penulis Tumpat (PESISIR) (Culture)
Zakat Eligible: False
✅ Pertubuhan Jalinan Anak Seni Teater, Tari dan Sastera Kelantan ( JASTERA ) (Culture)
Zakat Eligible: False
✅ Persatuan Ang

# Poverty by State Data Collection
* data from OpenDOSM: https://open.dosm.gov.my/data-catalogue/hh_poverty_state

In [3]:
import pandas as pd
URL_DATA = 'https://storage.dosm.gov.my/hies/hh_poverty_state.parquet'
# Read the Parquet file
poverty_df = pd.read_parquet(URL_DATA)

# Save to CSV
poverty_df.to_csv("/content/drive/MyDrive/datasets/malaysia_poverty_by_state.csv", index=False)
poverty_df.head()

Unnamed: 0,state,date,poverty_absolute,poverty_hardcore,poverty_relative
0,Johor,1970-01-01,45.7,,
1,Johor,1976-01-01,29.0,,
2,Johor,1979-01-01,18.2,,
3,Johor,1984-01-01,12.2,3.1,
4,Johor,1987-01-01,11.1,2.6,


In [4]:
# get the latest date for each state. also display poverty_absolute

# Group by state and get the latest date
latest_dates = poverty_df.groupby('state')['date'].max()

# Merge the latest dates back into the original DataFrame
merged_df = poverty_df.merge(latest_dates.rename('latest_date'), left_on=['state', 'date'], right_on=['state', 'latest_date'], how='inner')

# Select the desired columns and display the result
result = merged_df[['state', 'latest_date', 'poverty_absolute']]
result



Unnamed: 0,state,latest_date,poverty_absolute
0,Johor,2022-01-01,4.6
1,Kedah,2022-01-01,9.0
2,Kelantan,2022-01-01,13.2
3,Melaka,2022-01-01,4.2
4,Negeri Sembilan,2022-01-01,4.4
5,Pahang,2022-01-01,6.3
6,Perak,2022-01-01,7.5
7,Perlis,2022-01-01,4.0
8,Pulau Pinang,2022-01-01,2.0
9,Sabah,2022-01-01,19.7


# Regression Model to Rank Causes by Predicted Impact

In [8]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

# Load datasets
from google.colab import drive
#drive.mount('/content/drive')
df = pd.read_csv('/content/drive/MyDrive/datasets/charity_data.csv')

# Simulate last_verified_date within the past 2 years (0 to 730 days ago)
np.random.seed(42)
df["last_verified_date"] = [
    datetime.today() - timedelta(days=int(days))
    for days in np.random.uniform(0, 730, size=len(df))
]

# Calculate days since last verification
df["days_since_verified"] = (datetime.today() - df["last_verified_date"]).dt.days

# Apply exponential time decay: decay faster for older causes
decay_rate = 0.01  # Tune this for stronger/weaker decay
df["time_decay_weight"] = np.exp(-decay_rate * df["days_since_verified"])

# Target
y = df["score"]

# Feature selection
numeric_features = [
    "ascore", "fscore", "fund_eff",
    "admin_exp_p", "program_exp_p", "tot_exp",
    "tot_rev"
]

categorical_features = ["category", "subcategory"]

# Pipelines for preprocessing
numeric_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="mean")),  # Fill NaNs with mean
    ("scaler", StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),  # Fill NaNs with mode
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

# Full preprocessing
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)

# Final model pipeline
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train/test split
X = df[numeric_features + categorical_features]
X_train, X_test, y_train, y_test, decay_train, decay_test = train_test_split(
    X, y, df["time_decay_weight"], test_size=0.2, random_state=42
)

# Train model
model.fit(X_train, y_train)
print("Model training complete!")

# Predict impact scores
preds = model.predict(X_test)

# Now apply decay
adjusted_score = preds * decay_test.values


Model training complete!


Evaluate the model

In [9]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print("📉 RMSE:", rmse)
print("📈 R² score:", r2_score(y_test, y_pred))

📉 RMSE: 0.20978251886173938
📈 R² score: 0.9992535493252904


# Compute Impact Score

In [13]:
import json
from sklearn.ensemble import RandomForestRegressor  # or your loaded model
import random

# Load the full list of NGO records
with open('/content/drive/MyDrive/ngos_malaysia.json', 'r', encoding='utf-8') as f:
    ngo_list = json.load(f)
    # ngo=ngo_list[500]

# Map Malaysian NGO categories to the model’s expected ones
def map_category(description):
    desc = description.lower()
    if any(word in desc for word in ['climate', 'conservation', 'marine', 'green', 'reef']):
        return 'Environment'
    if any(word in desc for word in ['culture', 'heritage', 'arts', 'film']):
        return 'Arts, Culture, Humanities'
    if any(word in desc for word in ['mosque', 'islam', 'church', 'religious', 'dakwah']):
        return 'Religion'
    if any(word in desc for word in ['child', 'orphan', 'shelter', 'welfare', 'homeless', 'poverty']):
        return 'Human Services'
    if any(word in desc for word in ['school', 'education', 'student', 'learning']):
        return 'Education'
    if 'animal' in desc or 'pet' in desc:
        return 'Animals'
    if any(word in desc for word in ['refugee', 'foreign', 'rohingya', 'humanitarian aid']):
        return 'International'
    if any(word in desc for word in ['cancer', 'medical', 'hospital', 'health']):
        return 'Health'
    if any(word in desc for word in ['community', 'urban poor', 'b40', 'grassroots']):
        return 'Community Development'
    if any(word in desc for word in ['rights', 'equality', 'lgbtq', 'discrimination']):
        return 'Human and Civil Rights'
    if any(word in desc for word in ['policy', 'research', 'institute']):
        return 'Research and Public Policy'
    return 'Community Development'  # default fallback

def map_subcategory(description):
    desc = description.lower()

    mapping = [
        ("Environmental Protection and Conservation", ['climate', 'environment', 'pollution', 'deforestation', 'conservation']),
        ("Wildlife Conservation", ['wildlife', 'animal protection', 'endangered']),
        ("Animal Rights, Welfare, and Services", ['animal', 'rescue', 'stray']),
        ("Performing Arts", ['dance', 'theatre', 'performing', 'music', 'arts']),
        ("Museums", ['museum', 'exhibit']),
        ("Libraries, Historical Societies and Landmark Preservation", ['library', 'preservation', 'archive']),
        ("Social Services", ['social work', 'rehabilitation', 'support service']),
        ("Children's and Family Services", ['child', 'orphan', 'foster', 'family', 'youth']),
        ("Youth Development, Shelter, and Crisis Services", ['teen', 'youth shelter', 'drop-in centre']),
        ("Early Childhood Programs and Services", ['early education', 'preschool', 'kindergarten']),
        ("Special Education", ['special needs', 'inclusive education']),
        ("Youth Education Programs and Services", ['school', 'tutor', 'after school']),
        ("Adult Education Programs and Services", ['adult literacy', 'skills training']),
        ("Education Policy and Reform", ['curriculum reform', 'education policy']),
        ("Scholarship and Financial Support", ['scholarship', 'bursary', 'tuition help']),
        ("Religious Activities", ['mosque', 'church', 'dakwah', 'religious']),
        ("Religious Media and Broadcasting", ['religious tv', 'islamic radio', 'dakwah media']),
        ("Food Banks, Food Pantries, and Food Distribution", ['food bank', 'hunger', 'free meals']),
        ("Homeless Services", ['homeless', 'shelter', 'street outreach']),
        ("Medical Research", ['medical research', 'clinical trials']),
        ("Diseases, Disorders, and Disciplines", ['cancer', 'diabetes', 'hiv', 'mental health']),
        ("Patient and Family Support", ['caregiver support', 'patient services']),
        ("Treatment and Prevention Services", ['therapy', 'substance abuse', 'rehab']),
        ("Humanitarian Relief Supplies", ['refugees', 'war', 'disaster aid']),
        ("Development and Relief Services", ['poverty', 'disaster relief', 'international aid']),
        ("Advocacy and Education", ['advocacy', 'campaign', 'rights', 'public awareness']),
        ("Social and Public Policy Research", ['policy reform', 'research institute']),
        ("Housing and Neighborhood Development", ['housing', 'affordable home', 'community housing']),
        ("Community Foundations", ['community org', 'grassroots']),
        ("Public Broadcasting and Media", ['community radio', 'public tv']),
        ("International Peace, Security, and Affairs", ['peace building', 'security']),
        ("Non-Medical Science & Technology Research", ['tech research', 'innovation']),
    ]

    for subcat, keywords in mapping:
        if any(kw in desc for kw in keywords):
            return subcat

    return "Multipurpose Human Service Organizations"  # safe fallback

def map_state(state):
  st = state.lower()
  if 'kuala lumpur' in st:
    return 'W.P. Kuala Lumpur'
  elif 'putrajaya' in st:
    return 'W.P. Putrajaya'
  elif 'labuan' in st:
    return 'W.P. Labuan'
  elif 'penang' in st:
    return 'Pulau Pinang'
  else:
    return st

def extract_state(address_or_name):
    states = [
        "Selangor", "Kuala Lumpur", "Johor", "Penang", "Pulau Pinang", "Perak", "Kelantan", "Terengganu",
        "Negeri Sembilan", "Melaka", "Pahang", "Sabah", "Sarawak", "Putrajaya", "Labuan", "Kedah", "Perlis"
    ]
    text = address_or_name.lower()

    for state in states:
        if state.lower() in text:
            return state
    return "Selagor" # fallback

def get_zakat_weight(ngo):
    return 1.2 if ngo.get("zakat_eligible", False) else 1.0

def generate_mock_votes(ngo_list, min_votes=0, max_votes=50):
    return {
        ngo["name"]: random.randint(min_votes, max_votes)
        for ngo in ngo_list
    }

def normalize_votes(votes_dict):
    max_votes = max(votes_dict.values()) if votes_dict else 1
    return {ngo: votes / max_votes for ngo, votes in votes_dict.items()}

def engineer_features(ngo):
    description = ngo.get("description", "")
    category = map_category(description)

    # using mock data
    features = {
        "ascore": random.randint(20, 100),
        "fscore": random.randint(20, 100),
        "fund_eff": random.uniform(0.0, 0.80),
        "admin_exp_p": random.uniform(0.0, 0.25),
        "program_exp_p": random.uniform(0.0, 1.0),
        "tot_exp": 100000,  # dummy RM amount
        "tot_rev": 120000,
        "category": category,
        "subcategory": map_subcategory(description),
    }
    return features

def compute_final_score(ngo, model, poverty_df, normalized_votes, votes_by_ngo):
    # Feature engineering
    features = engineer_features(ngo)

    # Predict impact
    X = pd.DataFrame([features])
    model_score = model.predict(X)[0]

    # Get state from address
    addr = ngo.get("address", "")
    name = ngo.get("name", "")
    malaysia_state = extract_state(addr + " " + name)
    msia_state = map_state(malaysia_state)

    # Get poverty weight
    latest_dates = poverty_df.groupby('state')['date'].max()
    merged_df = poverty_df.merge(latest_dates.rename('latest_date'), left_on=['state', 'date'], right_on=['state', 'latest_date'], how='inner')
    row = merged_df[merged_df['state'].str.lower() == msia_state.lower()]
    if not row.empty:
      poverty_weight = row.iloc[0]['poverty_absolute']  # or whatever your column is
    else:
      poverty_weight = 1  # fallback

    # Zakat weight
    zakat_weight = get_zakat_weight(ngo)

    # DAO vote influence (0.0 to 1.0)
    vote_count = votes_by_ngo.get(name, 0)
    dao_vote_score = normalized_votes.get(name, 0)

    # Final score
    final_score = (model_score * 0.4) + (poverty_weight + zakat_weight)*1.2 + (dao_vote_score*100*0.4)
    final_score = min(final_score, 100.0)

    return {
        "name": ngo["name"],
        "model_score": model_score,
        "state": msia_state,
        "poverty_weight": poverty_weight,
        "zakat_weight": zakat_weight,
        "vote_count": vote_count,
        "dao_vote_score": dao_vote_score,
        "final_score": final_score
    }

votes_by_ngo = {
    ngo["name"]: random.randint(0, 50) for ngo in ngo_list  # simulate votes
}
# Normalize them
normalized_votes = normalize_votes(votes_by_ngo)

scored_ngos = [compute_final_score(ngo, model, poverty_df, normalized_votes, votes_by_ngo) for ngo in ngo_list]

# Optional: Sort by final score descending
scored_ngos = sorted(scored_ngos, key=lambda x: x['final_score'], reverse=True)

# Print top 5
print("Top 5 NGOs ranked by impact score")
for ngo in scored_ngos[:5]:
    print(ngo)


Top 5 NGOs ranked by impact score
{'name': 'Blue Life', 'model_score': np.float64(92.52909999999996), 'state': 'sabah', 'poverty_weight': np.float64(19.7), 'zakat_weight': 1.0, 'vote_count': 41, 'dao_vote_score': 0.82, 'final_score': np.float64(94.65163999999999)}
{'name': 'Pertubuhan Angkatan Gabungan Rakyat Asli Sabah (AGARAS)', 'model_score': np.float64(68.78359999999996), 'state': 'sabah', 'poverty_weight': np.float64(19.7), 'zakat_weight': 1.2, 'vote_count': 48, 'dao_vote_score': 0.96, 'final_score': np.float64(90.99343999999999)}
{'name': 'Hopes Malaysia', 'model_score': np.float64(60.35729999999998), 'state': 'sabah', 'poverty_weight': np.float64(19.7), 'zakat_weight': 1.2, 'vote_count': 48, 'dao_vote_score': 0.96, 'final_score': np.float64(87.62292)}
{'name': 'Sabah Integrity Alliance Association (SINAR)', 'model_score': np.float64(71.70599999999995), 'state': 'sabah', 'poverty_weight': np.float64(19.7), 'zakat_weight': 1.0, 'vote_count': 41, 'dao_vote_score': 0.82, 'final_scor

In [12]:
df_top = pd.DataFrame(scored_ngos)
# Optional: Display selected columns only
display_columns = ['name', 'state', 'model_score', 'poverty_weight', 'zakat_weight','vote_count','dao_vote_score', 'final_score']
print("Top 10 causes ranked by impact score")
df_top[display_columns].head(10)


Top 10 causes ranked by impact score


Unnamed: 0,name,state,model_score,poverty_weight,zakat_weight,vote_count,dao_vote_score,final_score
0,Taiwan Buddhist Tzu Chi Foundation Malaysia (S...,sabah,98.5906,19.7,1.0,40,0.8,98.1353
1,Sabah Foundation,sabah,95.501,19.7,1.0,37,0.74,94.7905
2,Marine Research Foundation,sabah,96.0002,19.7,1.0,35,0.7,93.8401
3,Persatuan CHILD (Caring and Helping Individual...,sabah,78.8339,19.7,1.0,45,0.9,91.25695
4,Rumah Anak Yatim As-Sakinah,sabah,91.0644,19.7,1.2,33,0.66,90.4122
5,Rumah Anak Yatim Tambunan Sabah,sabah,78.0467,19.7,1.2,42,0.84,89.30335
6,Institute for Indigenous Economic Progress (I...,sabah,82.8741,19.7,1.0,38,0.76,89.07705
7,Taiwan Buddhist Tzu Chi Foundation Malaysia (S...,sabah,79.8267,19.7,1.2,40,0.8,88.99335
8,Sabah Society For The Deaf,sabah,83.1531,19.7,1.2,34,0.68,87.05655
9,Society for the Sabah Heart Fund (SOSHF),sabah,68.6684,19.7,1.2,46,0.92,87.0142
