In [45]:
import json
import pandas as pd

# --- Parsers for each endpoint ---
def parse_ai_keyword_volume(task_result, endpoint):
    records = []
    for result in task_result.get("result", []):
        for item in result.get("items", []):
            for month_data in item.get("ai_monthly_searches", []):
                records.append({
                    "endpoint": endpoint,
                    "keyword": item.get("keyword"),
                    "ai_search_volume": item.get("ai_search_volume"),
                    "year": month_data.get("year"),
                    "month": month_data.get("month"),
                    "monthly_ai_volume": month_data.get("ai_search_volume")
                })
    return pd.DataFrame(records)


def parse_serp(task_result, endpoint):
    records = []
    for result in task_result.get("result", []):
        for item in result.get("items", []):
            records.append({
                "endpoint": endpoint,
                "keyword": result.get("keyword"),
                "domain": item.get("domain"),
                "rank_absolute": item.get("rank_absolute"),
                "rank_group": item.get("rank_group"),
                "page": item.get("page"),
                "position": item.get("position"),
                "title": item.get("title"),
                "url": item.get("url"),
                "description": item.get("description")
            })
    return pd.DataFrame(records)


def parse_keyword_search_volume(task_result, endpoint):
    records = []
    for result in task_result.get("result", []):
        keyword = result.get("keyword")
        search_volume = result.get("search_volume")
        competition = result.get("competition")
        cpc = result.get("cpc")
        for month_data in result.get("monthly_searches", []):
            records.append({
                "endpoint": endpoint,
                "keyword": keyword,
                "search_volume": search_volume,
                "competition": competition,
                "cpc": cpc,
                "year": month_data.get("year"),
                "month": month_data.get("month"),
                "monthly_search_volume": month_data.get("search_volume")
            })
    return pd.DataFrame(records)


# --- Driver function ---
def parse_api_responses(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        responses = json.load(f)

    df_list = []

    for resp in responses:
        endpoint = resp.get("endpoint")
        data = resp.get("data", {})
        tasks = data.get("tasks", [])

        for task in tasks:
            if endpoint == "ai_keyword_search_volume":
                df_list.append(parse_ai_keyword_volume(task, endpoint))
            elif endpoint == "serp_google_organic_live_advanced":
                df_list.append(parse_serp(task, endpoint))
            elif endpoint == "keyword_search_volume":
                df_list.append(parse_keyword_search_volume(task, endpoint))
            else:
                print(f"Warning: Unknown endpoint {endpoint}, skipping")

    return df_list


# --- Usage ---
df_list = parse_api_responses("api_responses.json")
ai_keyword_df = df_list[0]
serp_df = df_list[1]
volume_df = df_list[2]

In [46]:
ai_keyword_df.head()

Unnamed: 0,endpoint,keyword,ai_search_volume,year,month,monthly_ai_volume
0,ai_keyword_search_volume,faceless video ai,100,2025,9,100
1,ai_keyword_search_volume,faceless video ai,100,2025,8,94
2,ai_keyword_search_volume,faceless video ai,100,2025,7,83
3,ai_keyword_search_volume,faceless video ai,100,2025,6,61
4,ai_keyword_search_volume,faceless video ai,100,2025,5,44


In [47]:
keyword_volume_df.head()

Unnamed: 0,endpoint,keyword,search_volume,competition,cpc,year,month,monthly_search_volume
0,keyword_search_volume,faceless video ai,480,HIGH,1.64,2025,8,720
1,keyword_search_volume,faceless video ai,480,HIGH,1.64,2025,7,480
2,keyword_search_volume,faceless video ai,480,HIGH,1.64,2025,6,480
3,keyword_search_volume,faceless video ai,480,HIGH,1.64,2025,5,480
4,keyword_search_volume,faceless video ai,480,HIGH,1.64,2025,4,320


In [102]:
import json
import pandas as pd

def parse_ai_keyword_volume(task_result, endpoint):
    keyword, volume = [], []
    for result in task_result.get("result", []):
        for item in result.get("items", []):
            keyword.append(item["keyword"])
            volume.append(item["ai_search_volume"])
    return keyword, volume

def parse_serp_organic(task_result, endpoint=None):
    """
    Parse SERP task result, keeping only organic results.
    Returns a DataFrame with rank, page, position, domain, title, description, and text_for_semantics.
    """
    records = []
    domains_in_people_also_ask = []
    for result in task_result.get("result", []):
        for item in result.get("items", []):
            if item.get("type") == "organic":
                title = item.get("title", "")
                description = item.get("description", "")
                text_for_semantics = f"{title} {description}".strip()
                records.append({
                    "rank_absolute": item.get("rank_absolute"),
                    "page": item.get("page"),
                    "domain": item.get("domain"),
                    "title": title,
                    "description": description,
                    "is_featured_snippet": item.get("is_featured_snippet"),
                })
            if item.get("type") == "people_also_ask":
                for item in item.get("items", []):
                    for expanded_item in item.get("expanded_element", []):
                        domains_in_people_also_ask.append(expanded_item.get("domain"))
    df = pd.DataFrame(records)
    df["in_people_also_ask"] = df["domain"].isin(domains_in_people_also_ask)
    return df

def parse_api_responses(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        responses = json.load(f)

    keyword, volume = [], []
    serp_df = None
    for resp in responses:
        endpoint = resp.get("endpoint")
        data = resp.get("data", {})
        tasks = data.get("tasks", [])
        for task in tasks:
            if endpoint == "ai_keyword_search_volume":
                keyword, volume = parse_ai_keyword_volume(task, endpoint)
            elif endpoint == "serp_google_organic_live_advanced":
                serp_df = parse_serp_organic(task, endpoint)
            elif endpoint == "keyword_search_volume":
                pass

    return {
        "keyword": keyword[0],
        "volume": volume[0],
        "serp_df": serp_df
    }

In [103]:
api_res = parse_api_responses("api_responses_faceless_video_ai_videoinu_com_20251005_144925.json")
api_res["serp_df"]

Unnamed: 0,rank_absolute,page,domain,title,description,is_featured_snippet,in_people_also_ask
0,1,1,faceless.video,Faceless.video,Our AI automated content creation tool convert...,False,False
1,2,1,invideo.io,Free AI Faceless Video Generator,Create faceless video with simple prompts. Our...,False,False
2,4,1,www.canva.com,AI Faceless Video - Canva Apps,"Generate engaging, up-to-10-minute HD faceless...",False,False
3,5,1,autoshorts.ai,AutoShorts.ai | #1 Faceless Video Generator fo...,"AutoShorts.ai automatically creates, schedules...",False,False
4,8,1,sendshort.ai,"Faceless Video AI Generator | Free, No Watermark",SendShort is the #1 AI tool for creating facel...,False,True
5,9,1,www.short.ai,Free AI Faceless Video Generator - Create Vira...,Short AI creates engaging faceless videos in m...,False,False
6,10,1,www.bigmotion.ai,BigMotion AI: Free Faceless AI Video Generator,A faceless AI video generator creates videos w...,False,False
7,11,1,www.facelessvideos.ai,"FacelessVideos.AI | Create viral, faceless vid...",Creating videos with FacelessVideos is very ea...,False,False
8,13,2,www.opus.pro,Faceless AI Video Generator - Create Videos In...,Generate faceless AI videos effortlessly for s...,False,False
9,14,2,www.veed.io,Faceless Video AI Generator - Create Online,Create awesome videos without showing your fac...,False,True


In [124]:
from sklearn.preprocessing import MinMaxScaler
def map_serp_to_logit_components(
    serp_df: pd.DataFrame, 
    keyword: str,
    lambdas: List[float] = [1.0, 1.0, 1.0, 1.0, 1.0],
    alpha: float = 0.15,
    similarity_func: Callable = get_similarities
) -> pd.DataFrame:
    """
    Map SERP dataframe rows to logit components for ChatGPT volume estimation.
    
    Args:
        serp_df: DataFrame with columns ['rank_absolute', 'page', 'domain', 'title', 'description', 
                'is_featured_snippet', 'in_people_also_ask', ...]
        keyword: The search keyword (e.g., "faceless video ai")
        lambdas: Weight vector [λ1, λ2, λ3, λ4, λ5] for [vis, sem, auth, feat, est_clicks]
        alpha: Decay parameter for visibility calculation
        similarity_func: Optional function for semantic similarity calculation
    
    Returns:
        DataFrame with original columns plus ['vis', 'sem', 'auth', 'feat', 'est_clicks', 'logit']
    """
    # Create a copy to avoid modifying original
    result_df = serp_df.copy()
    
    # 1. Visibility (Vis) - based on rank_absolute
    result_df['vis'] = result_df['rank_absolute'].apply(
        lambda rank: default_visibility(rank, alpha)
    )
    
    # 2. Compute Semantic Score
    result_df["text_for_semantics"] = result_df['title'] + " " + result_df['description']
    result_df["sem"] = similarity_func(keyword, result_df['text_for_semantics'].tolist())[0]
    
    # 3. Authority (Auth) - For Future Implementation
    result_df['auth'] = result_df['rank_absolute'].apply(lambda x: 0)
    #     result_df['auth'] = result_df['rank_absolute'].apply(
    #         lambda rank: default_auth(rank, max_rank)
    #     )
    
    # 4. SERP Features (Feat) - based on featured snippets, people also ask, etc.
    result_df['feat'] = result_df.apply(default_feature_score, axis=1)
    
    # 5. Estimated clicks (Est_clicks) - based on utm for future implementation
    result_df['est_clicks'] = result_df['rank_absolute'].apply(lambda x: 0)
    
    # 6. Scale features to [0, 1] range to prevent bias using sklearn MinMaxScaler
    scaler = MinMaxScaler()
    feature_columns = ['vis', 'sem', 'auth', 'feat', 'est_clicks']
    
    # Scale features
    scaled_features = scaler.fit_transform(result_df[feature_columns])
    for i, col in enumerate(feature_columns):
        result_df[f'{col}_scaled'] = scaled_features[:, i]
    
    # 7. Compute final logit score with scaled features
    result_df['logit'] = result_df.apply(
        lambda row: compute_logit(
            row['vis_scaled'], 
            row['sem_scaled'], 
            row['auth_scaled'], 
            row['feat_scaled'], 
            row['est_clicks_scaled'], 
            lambdas
        ), axis=1
    )

    # 8. Compute Domain Share using softmax
    logits = result_df['logit'].values
    exps = np.exp(logits - np.max(logits))  # Subtract max for numerical stability
    softmax_probs = exps / (exps.sum() + 1e-12)  # Add small epsilon to prevent division by zero
    result_df['domain_share'] = softmax_probs
    
    return result_df



In [None]:
map_serp_to_logit_components(api_res["serp_df"], "faceless video ai")

Unnamed: 0,rank_absolute,page,domain,title,description,is_featured_snippet,in_people_also_ask,vis,text_for_semantics,sem,auth,feat,est_clicks,vis_scaled,sem_scaled,auth_scaled,feat_scaled,est_clicks_scaled,logit,domain_share
0,1,1,faceless.video,Faceless.video,Our AI automated content creation tool convert...,False,False,1.0,Faceless.video Our AI automated content creati...,0.7318,0,0.0,0,1.0,0.828551,0.0,0.0,0.0,1.828551,0.083155
1,2,1,invideo.io,Free AI Faceless Video Generator,Create faceless video with simple prompts. Our...,False,False,0.860708,Free AI Faceless Video Generator Create facele...,0.85133,0,0.0,0,0.859363,1.0,0.0,0.0,0.0,1.859363,0.085757
2,4,1,www.canva.com,AI Faceless Video - Canva Apps,"Generate engaging, up-to-10-minute HD faceless...",False,False,0.637628,AI Faceless Video - Canva Apps Generate engagi...,0.730502,0,0.0,0,0.63413,0.826689,0.0,0.0,0.0,1.460819,0.057569
3,5,1,autoshorts.ai,AutoShorts.ai | #1 Faceless Video Generator fo...,"AutoShorts.ai automatically creates, schedules...",False,False,0.548812,AutoShorts.ai | #1 Faceless Video Generator fo...,0.596015,0,0.0,0,0.544456,0.633787,0.0,0.0,0.0,1.178242,0.043397
4,8,1,sendshort.ai,"Faceless Video AI Generator | Free, No Watermark",SendShort is the #1 AI tool for creating facel...,False,True,0.349938,"Faceless Video AI Generator | Free, No Waterma...",0.800336,0,0.0,0,0.343662,0.926856,0.0,0.0,0.0,1.270518,0.047593
5,9,1,www.short.ai,Free AI Faceless Video Generator - Create Vira...,Short AI creates engaging faceless videos in m...,False,False,0.301194,Free AI Faceless Video Generator - Create Vira...,0.774314,0,0.0,0,0.294448,0.889531,0.0,0.0,0.0,1.183979,0.043647
6,10,1,www.bigmotion.ai,BigMotion AI: Free Faceless AI Video Generator,A faceless AI video generator creates videos w...,False,False,0.25924,BigMotion AI: Free Faceless AI Video Generator...,0.742915,0,0.0,0,0.252089,0.844493,0.0,0.0,0.0,1.096582,0.039994
7,11,1,www.facelessvideos.ai,"FacelessVideos.AI | Create viral, faceless vid...",Creating videos with FacelessVideos is very ea...,False,False,0.22313,"FacelessVideos.AI | Create viral, faceless vid...",0.820329,0,0.0,0,0.21563,0.955534,0.0,0.0,0.0,1.171164,0.043091
8,13,2,www.opus.pro,Faceless AI Video Generator - Create Videos In...,Generate faceless AI videos effortlessly for s...,False,False,0.165299,Faceless AI Video Generator - Create Videos In...,0.836485,0,0.0,0,0.157241,0.978707,0.0,0.0,0.0,1.135948,0.0416
9,14,2,www.veed.io,Faceless Video AI Generator - Create Online,Create awesome videos without showing your fac...,False,True,0.142274,Faceless Video AI Generator - Create Online Cr...,0.78568,0,0.0,0,0.133994,0.905834,0.0,0.0,0.0,1.039827,0.037788


: 