# Libraries

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

plt.rcParams['font.family'] = 'Times New Roman'

from typing import List
from tqdm import tqdm

# Path

In [None]:
data_path = '/path/to/your/data/folder/Data for inputs/'
figure_path = '/path/to/your/data/folder/Folder for outputs/'

# Data

In [None]:
nsf = pd.read_csv(data_path + 'ProposalAnnotation_NSF.csv', index_col=0)
nsfc = pd.read_csv(data_path + 'ProposalAnnotation_NSFC.csv', index_col=0)

# Visualization

### ***Polar***

In [None]:
col_list=nsf.columns[2:-1].to_list()

nsf_all=[]
nsf_positive=[]

for col in col_list:
    nsf_all.append(len(nsf))
    nsf_positive.append(nsf[col].sum())

nsfc_all=[]
nsfc_positive=[]

for col in col_list:
    nsfc_all.append(len(nsfc))
    nsfc_positive.append(nsfc[col].sum())

In [None]:
angles = np.linspace(0, 2 * np.pi, len(col_list), endpoint=False).tolist()
angles += angles[:1] 

fig, ax2 = plt.subplots(1, 1, figsize=(10,10), subplot_kw={'polar': True})

offset = 0.05
angles_nsf = [angle - offset for angle in angles[:-1]] 
angles_nsfc = [angle + offset for angle in angles[:-1]] 

bars_nsf = ax2.bar(angles_nsf, nsf_positive, width=0.125, color='#1f77b4', alpha=0.45, edgecolor='none', label='NSF, n=179')
bars_nsfc = ax2.bar(angles_nsfc, nsfc_positive, width=0.125, color='#ff7f0e', alpha=0.45, edgecolor='none', label='NSFC, n=196')

ax2.plot(np.linspace(0, 2 * np.pi, 100), [179] * 100, color='#1f77b4', linestyle=(0, (20, 10)), linewidth=0.25)
ax2.plot(np.linspace(0, 2 * np.pi, 100), [196] * 100, color='#ff7f0e', linestyle=(0, (20, 10)), linewidth=0.25)

labels = col_list 
for label, angle in zip(labels, angles[:-1]):

    mid_index = len(label) // 2

    if ' ' in label[mid_index:]: 
        split_index = mid_index + label[mid_index:].index(' ')
    elif ' ' in label[:mid_index]: 
        split_index = label[:mid_index].rindex(' ')
    else:
        split_index = mid_index 

    label_two_lines = label[:split_index] + '\n' + label[split_index + 1:]

    angle_deg = np.degrees(angle) 
    if angle_deg >= 90 and angle_deg <= 270:
        alignment = "center" 
        rotation = angle_deg + 180
    else:
        alignment = "center"
        rotation = angle_deg  

    ax2.text(angle, 165, label_two_lines, fontsize=12, horizontalalignment=alignment, verticalalignment='center', rotation=rotation)

ax2.set_xticks([]) 
ax2.set_yticks([]) 

ax2.grid(True, linewidth=0.15)
ax2.spines['polar'].set_linewidth(0.25) 

ax2.set_ylim(0, 210)

ax2.tick_params(axis='x', pad=0.5)
ax2.tick_params(axis='y', labelsize=8)

ax2.set_rlabel_position(225)

for angle in angles:
    ax2.plot([angle, angle], [0, 210], color='gray', linewidth=0.15, linestyle='--') 

ax2.legend(frameon=False, 
           loc='lower left', 
           fontsize=12)

plt.savefig(figure_path + 'Figure_name.pdf', format='pdf', dpi=600, bbox_inches='tight')

plt.show()

### ***quadrant***

In [None]:
import importlib

import my_utils
importlib.reload(my_utils)

from my_utils import fit_ellipse_to_polygon, ellipse_points, y_col_list, x_col_list

In [None]:
nsf_nsfc=pd.concat([nsf,nsfc],axis=0)

nsf_nsfc['Y'] = nsf_nsfc[y_col_list].sum(axis=1)
nsf_nsfc['X'] = nsf_nsfc[x_col_list].sum(axis=1)

nsf_nsfc['Quadrant_value_1']=nsf_nsfc['X']+nsf_nsfc['Y']
nsf_nsfc['Quadrant_value_2']=nsf_nsfc['Y']*2
nsf_nsfc['Quadrant_value_3']=0
nsf_nsfc['Quadrant_value_4']=nsf_nsfc['X']*2

nsf_nsfc=nsf_nsfc.reset_index(drop=True)

df_nsf=nsf_nsfc[nsf_nsfc['source']=='NSF']
df_nsf=df_nsf.reset_index(drop=True)

avg_quadrant_nsf=[]

for t in range(2018,2024):    
    batch=df_nsf[df_nsf['year']<=t]    
    
    quadrant=[]
    for q in df_nsf.columns[-4:]:
        quadrant.append(batch[q].sum())
    
    avg_quadrant_nsf.append(quadrant)

coord_quadrant_nsf=[]

for i in range(len(avg_quadrant_nsf)):
    coord=[]
    coord.append((avg_quadrant_nsf[i][0],avg_quadrant_nsf[i][0]))
    coord.append((-avg_quadrant_nsf[i][1],avg_quadrant_nsf[i][1]))
    coord.append((-avg_quadrant_nsf[i][2],-avg_quadrant_nsf[i][2]))
    coord.append((avg_quadrant_nsf[i][3],-avg_quadrant_nsf[i][3]))
    coord.append((avg_quadrant_nsf[i][0],avg_quadrant_nsf[i][0]))

    coord_quadrant_nsf.append(coord)

_ratio=179/196

df_nsfc=nsf_nsfc[nsf_nsfc['source']=='NSFC']
df_nsfc=df_nsfc.reset_index(drop=True)

avg_quadrant_nsfc=[]

for t in range(2015,2021):    
    batch=df_nsfc[df_nsfc['year']<=t]    
    
    quadrant=[]
    for q in df_nsfc.columns[-4:]:
        quadrant.append(batch[q].sum()*_ratio)
    
    avg_quadrant_nsfc.append(quadrant)

coord_quadrant_nsfc=[]

for i in range(len(avg_quadrant_nsfc)):
    coord=[]
    coord.append((avg_quadrant_nsfc[i][0],avg_quadrant_nsfc[i][0]))
    coord.append((-avg_quadrant_nsfc[i][1],avg_quadrant_nsfc[i][1]))
    coord.append((-avg_quadrant_nsfc[i][2],-avg_quadrant_nsfc[i][2]))
    coord.append((avg_quadrant_nsfc[i][3],-avg_quadrant_nsfc[i][3]))
    coord.append((avg_quadrant_nsfc[i][0],avg_quadrant_nsfc[i][0]))

    coord_quadrant_nsfc.append(coord)

In [None]:
from matplotlib.patches import Ellipse, PathPatch
from matplotlib.path import Path

plt.figure(figsize=(10, 10))

center_nsf, width_nsf, height_nsf, angle_nsf = fit_ellipse_to_polygon(coord_quadrant_nsf[-1])
x_nsf, y_nsf = ellipse_points(center_nsf, width_nsf, height_nsf, angle_nsf)

center_nsfc, width_nsfc, height_nsfc, angle_nsfc = fit_ellipse_to_polygon(coord_quadrant_nsfc[-1])
x_nsfc, y_nsfc = ellipse_points(center_nsfc, width_nsfc, height_nsfc, angle_nsfc)

path_nsf = Path(np.column_stack((x_nsf, y_nsf)))
path_nsfc = Path(np.column_stack((x_nsfc, y_nsfc)))

for i in range(1):
    points = coord_quadrant_nsf[i]
    x, y = zip(*points)
    
    center, width, height, angle = fit_ellipse_to_polygon(points)
    width *= 1 
    height *= 1  
    ellipse = Ellipse(xy=center, width=width, height=height, angle=angle, edgecolor='#1f77b4', fc='None', linestyle='--', linewidth=0.5, label='NSF, 2017-2023')
    ellipse.set_linestyle((0, (6, 3)))
    plt.gca().add_patch(ellipse)

for i in range(1, len(coord_quadrant_nsf)):
    points = coord_quadrant_nsf[i]
    x, y = zip(*points)
    
    center, width, height, angle = fit_ellipse_to_polygon(points)
    width *= 1 
    height *= 1 
    ellipse = Ellipse(xy=center, width=width, height=height, angle=angle, edgecolor='#1f77b4', fc='None', linestyle='--', linewidth=0.5)
    ellipse.set_linestyle((0, (6, 3)))
    plt.gca().add_patch(ellipse)

for i in range(1):
    points = coord_quadrant_nsfc[i]
    x, y = zip(*points)
    
    center, width, height, angle = fit_ellipse_to_polygon(points)
    width *= 1 
    height *= 1 
    ellipse = Ellipse(xy=center, width=width, height=height, angle=angle, edgecolor='#ff7f0e', fc='None', linestyle='--', linewidth=0.5, label='NSFC, 2015-2020')
    ellipse.set_linestyle((0, (6, 3)))
    plt.gca().add_patch(ellipse)

for i in range(1, len(coord_quadrant_nsfc)):
    points = coord_quadrant_nsfc[i]
    x, y = zip(*points)
    
    center, width, height, angle = fit_ellipse_to_polygon(points)
    width *= 1 
    height *= 1
    ellipse = Ellipse(xy=center, width=width, height=height, angle=angle, edgecolor='#ff7f0e', fc='None', linestyle='--', linewidth=0.5)
    ellipse.set_linestyle((0, (6, 3)))
    plt.gca().add_patch(ellipse)

num_lines = 20

patches_nsfc = []
for i in range(len(x_nsfc) - 1):
    if path_nsfc.contains_point([x_nsfc[i], y_nsfc[i]]) and not path_nsf.contains_point([x_nsfc[i], y_nsfc[i]]):
        polygon = [(x_nsfc[i], y_nsfc[i]), (x_nsf[i], y_nsf[i]), (x_nsf[i+1], y_nsf[i+1]), (x_nsfc[i+1], y_nsfc[i+1])]
        poly_patch = PathPatch(Path(polygon), edgecolor='none', facecolor='none', alpha=0.1)
        patches_nsfc.append(poly_patch)

for patch in patches_nsfc:
    plt.gca().add_patch(patch)

for i in range(1, num_lines + 1):
    t = i / (num_lines + 1)
    
    x_interpolated = (1 - t) * np.array(x_nsf) + t * np.array(x_nsfc)
    y_interpolated = (1 - t) * np.array(y_nsf) + t * np.array(y_nsfc)
    
    alpha_value = 0.05 * t 

    for j in range(len(x_interpolated) - 1):
        if path_nsfc.contains_point([x_interpolated[j], y_interpolated[j]]) and not path_nsf.contains_point([x_interpolated[j], y_interpolated[j]]):
            polygon = [(x_interpolated[j], y_interpolated[j]), 
                       (x_interpolated[j+1], y_interpolated[j+1]), 
                       (x_nsfc[j+1], y_nsfc[j+1]), 
                       (x_nsfc[j], y_nsfc[j])]
            poly_patch = PathPatch(Path(polygon), edgecolor='none', facecolor='#ff7f0e', alpha=alpha_value*1)
            plt.gca().add_patch(poly_patch)

patches_nsf = []
for i in range(len(x_nsf) - 1):
    if path_nsf.contains_point([x_nsf[i], y_nsf[i]]) and not path_nsfc.contains_point([x_nsf[i], y_nsf[i]]):
        polygon = [(x_nsf[i], y_nsf[i]), (x_nsfc[i], y_nsfc[i]), (x_nsfc[i+1], y_nsfc[i+1]), (x_nsf[i+1], y_nsf[i+1])]
        poly_patch = PathPatch(Path(polygon), edgecolor='none', facecolor='none', alpha=0.1)
        patches_nsf.append(poly_patch)

for patch in patches_nsf:
    plt.gca().add_patch(patch)

for i in range(1, num_lines + 1):
    t = i / (num_lines + 1)
    
    x_interpolated = (1 - t) * np.array(x_nsf) + t * np.array(x_nsfc)
    y_interpolated = (1 - t) * np.array(y_nsf) + t * np.array(y_nsfc)

    alpha_value = 0.05 * t  
    
    for j in range(len(x_interpolated) - 1):
        if path_nsf.contains_point([x_interpolated[j], y_interpolated[j]]) and not path_nsfc.contains_point([x_interpolated[j], y_interpolated[j]]):
            polygon = [(x_interpolated[j], y_interpolated[j]), 
                       (x_interpolated[j+1], y_interpolated[j+1]), 
                       (x_nsf[j+1], y_nsf[j+1]), 
                       (x_nsf[j], y_nsf[j])]
            poly_patch = PathPatch(Path(polygon), edgecolor='none', facecolor='#1f77b4', alpha=alpha_value*1)
            plt.gca().add_patch(poly_patch)

plt.plot([-2000, 2000], [-2000, 2000], c='k', linewidth=0.1, linestyle='--', dashes=[30, 20])
plt.plot([-2000, 2000], [2000, -2000], c='k', linewidth=0.1, linestyle='--', dashes=[30, 20])

plt.axvline(x=0, c='k', linewidth=0.2)
plt.axhline(y=0, c='k', linewidth=0.2)

plt.text(1700, 1700, 'Use-inspired Basic Research\n(Pasteur)', fontsize=12, color='k', ha='center', va='center')
plt.text(-1700, 1700, 'Pure Basic Research\n(Bohr)', fontsize=12, color='k', ha='center', va='center')
plt.text(1700, -1700, 'Pure Applied Research\n(Edison)', fontsize=12, color='k', ha='center', va='center')

plt.xlim(-2200, 2200)
plt.ylim(-2200, 2200)

plt.legend(frameon=False, fontsize=12, loc='lower left')

plt.axis('off')

plt.savefig(figure_path + 'Figure_name.pdf', format='pdf', dpi=600, bbox_inches='tight')

plt.show()

# Feature

### ***setting***

In [None]:
from openai import OpenAI

import importlib
import my_utils
importlib.reload(my_utils)
from my_utils import feature_extraction 

_abstract_list = np.load(data_path + 'RPAbstracts.npy')
_feature_list = np.load(data_path + 'RPFeatures.npy')

_key = '' # insert OpenAI key
_system_prompt = '' # define context, role...
_requirements = '' # clarify requirements...
_model = '' # choose model type
_temperature = '' # set model temperature

### ***run***

In [None]:
df_output = feature_extraction(
        api_key = _key,
        system_prompt = _system_prompt,
        abstract_list = _abstract_list, 
        feature_list = _feature_list,
        requirements = _requirements,
        model = _model,
        temperature = _temperature
)