In [1]:
from pyspark.sql import SparkSession,Row
import pyspark.sql.functions as F
import pyspark.sql.types as T

In [2]:
spark=(
    SparkSession.builder.master("yarn").appName("Pyspark with XGBoot")
    .config("spark.driver.cores","4")
    .config("spark.driver.memory","4g")
    .config("spark.executor.memory","4g")
    .config("spark.executor.cores","4")
    .getOrCreate()
)

24/01/23 18:03:34 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
24/01/23 18:03:36 WARN Client: Neither spark.yarn.jars nor spark.yarn.archive is set, falling back to uploading libraries under SPARK_HOME.


In [3]:
# import data
data=spark.read.csv(
    "/pyspark_xgboot/data.csv",inferSchema=True,header=True,encoding="gbk"
)
print(data.count(),len(data.columns))

                                                                                

16000 82


In [4]:
# 观察特征的类型
data.printSchema()

root
 |-- 个人编码: double (nullable = true)
 |-- 一天去两家医院的天数: integer (nullable = true)
 |-- 就诊的月数: integer (nullable = true)
 |-- 月就诊天数_MAX: integer (nullable = true)
 |-- 月就诊天数_AVG: double (nullable = true)
 |-- 月就诊医院数_MAX: integer (nullable = true)
 |-- 月就诊医院数_AVG: double (nullable = true)
 |-- 就诊次数_SUM: integer (nullable = true)
 |-- 月就诊次数_MAX: integer (nullable = true)
 |-- 月就诊次数_AVG: double (nullable = true)
 |-- 月统筹金额_MAX: double (nullable = true)
 |-- 月统筹金额_AVG: double (nullable = true)
 |-- 月药品金额_MAX: double (nullable = true)
 |-- 月药品金额_AVG: double (nullable = true)
 |-- 医院_就诊天数_MAX: integer (nullable = true)
 |-- 医院_就诊天数_AVG: double (nullable = true)
 |-- 医院_统筹金_MAX: double (nullable = true)
 |-- 医院_统筹金_AVG: double (nullable = true)
 |-- 医院_药品_MAX: double (nullable = true)
 |-- 医院_药品_AVG: double (nullable = true)
 |-- 医院编码_NN: integer (nullable = true)
 |-- 顺序号_NN: integer (nullable = true)
 |-- 交易时间DD_NN: integer (nullable = true)
 |-- 交易时间YYYY_NN: integer (nullable = true)
 |--

In [5]:
import pandas as pd
pd.set_option("display.max_rows",1000)

In [6]:
# 将特征中的二元特征提取出来
is_binary = data.agg(
    *[
        (F.size(F.collect_set(x)) == 2).alias(x)
        for x in data.columns
    ]
).toPandas()
is_binary.unstack()

24/01/23 18:04:18 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
                                                                                

个人编码               0    False
一天去两家医院的天数         0    False
就诊的月数              0    False
月就诊天数_MAX          0    False
月就诊天数_AVG          0    False
月就诊医院数_MAX         0    False
月就诊医院数_AVG         0    False
就诊次数_SUM           0    False
月就诊次数_MAX          0    False
月就诊次数_AVG          0    False
月统筹金额_MAX          0    False
月统筹金额_AVG          0    False
月药品金额_MAX          0    False
月药品金额_AVG          0    False
医院_就诊天数_MAX        0    False
医院_就诊天数_AVG        0    False
医院_统筹金_MAX         0    False
医院_统筹金_AVG         0    False
医院_药品_MAX          0    False
医院_药品_AVG          0    False
医院编码_NN            0    False
顺序号_NN             0    False
交易时间DD_NN          0    False
交易时间YYYY_NN        0    False
交易时间YYYYMM_NN      0    False
住院天数_SUM           0    False
个人账户金额_SUM         0    False
统筹支付金额_SUM         0    False
ALL_SUM            0    False
可用账户报销金额_SUM       0    False
药品费发生金额_SUM        0    False
药品费自费金额_SUM        0    False
药品费申报金额_SUM        0    False
贵重药品发生金额_S

In [7]:
# 创建4个顶级变量
identifiers="个人编码"
target_column= "RES"
binary_columns=[
    "BZ_民政救助",
    "BZ_城乡优抚",
    "是否挂号"
]
continuous_columns=[
    x
    for x in data.columns
    if x not in binary_columns
    and x not in target_column
    and x not in identifiers
]

In [8]:
# 缺失值处理
# 删除只包含 null 的数据
# 并使用 0.0 填充缺失值
data=data.dropna(
    how="all",
    subset=[x for x in data.columns if x not in identifiers]
)
data=data.dropna(subset=target_column)
data=data.fillna(0.0,subset=data.columns)
print(data.count(),len(data.columns))

16000 82


In [9]:
# 清洗没有用的特征列 只需要清洗continuous_columns
continuous_columns=data.select(
    *[
        x for x in continuous_columns
        if (data.select(F.countDistinct(F.col(x))).collect()[0][0]!=1)
    ]
).columns
print(len(continuous_columns))
print(continuous_columns)

                                                                                

75
['一天去两家医院的天数', '就诊的月数', '月就诊天数_MAX', '月就诊天数_AVG', '月就诊医院数_MAX', '月就诊医院数_AVG', '就诊次数_SUM', '月就诊次数_MAX', '月就诊次数_AVG', '月统筹金额_MAX', '月统筹金额_AVG', '月药品金额_MAX', '月药品金额_AVG', '医院_就诊天数_MAX', '医院_就诊天数_AVG', '医院_统筹金_MAX', '医院_统筹金_AVG', '医院_药品_MAX', '医院_药品_AVG', '医院编码_NN', '顺序号_NN', '交易时间DD_NN', '交易时间YYYYMM_NN', '个人账户金额_SUM', '统筹支付金额_SUM', 'ALL_SUM', '可用账户报销金额_SUM', '药品费发生金额_SUM', '药品费自费金额_SUM', '药品费申报金额_SUM', '贵重药品发生金额_SUM', '中成药费发生金额_SUM', '中草药费发生金额_SUM', '检查费发生金额_SUM', '检查费自费金额_SUM', '检查费申报金额_SUM', '贵重检查费金额_SUM', '治疗费发生金额_SUM', '治疗费自费金额_SUM', '治疗费申报金额_SUM', '手术费发生金额_SUM', '手术费自费金额_SUM', '手术费申报金额_SUM', '床位费发生金额_SUM', '床位费申报金额_SUM', '医用材料发生金额_SUM', '高价材料发生金额_SUM', '医用材料费自费金额_SUM', '成分输血申报金额_SUM', '其它发生金额_SUM', '其它申报金额_SUM', '一次性医用材料申报金额_SUM', '起付线标准金额_MAX', '起付标准以上自负比例金额_SUM', '医疗救助个人按比例负担金额_SUM', '最高限额以上金额_SUM', '基本统筹基金支付金额_SUM', '公务员医疗补助基金支付金额_SUM', '城乡救助补助金额_SUM', '基本个人账户支付_SUM', '非账户支付金额_SUM', '本次审批金额_SUM', '补助审批金额_SUM', '医疗救助医院申请_SUM', '残疾军人补助_SUM', '民政救助补助_SUM', '城乡优抚补助_SUM', '出院诊断病种名称_

In [10]:
# 将所有连续特征放在一列，以便后续处理
from pyspark.ml.feature import VectorAssembler
continuous_features=VectorAssembler(
    inputCols=continuous_columns,
    outputCol="continuous_features"
)
vector_data=data.select(continuous_columns)
for x in continuous_columns:
    vector_data=vector_data.where(~F.isnull(F.col(x)))
vector_variable=continuous_features.transform(vector_data)
vector_variable.select("continuous_features").show(5,False)

[Stage 473:>                                                        (0 + 1) / 1]

+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|continuous_features                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |
+---

                                                                                

In [11]:
vector_variable.select("continuous_features").printSchema()

root
 |-- continuous_features: vector (nullable = true)



In [12]:
from pyspark.ml.stat import Correlation
# 获取相关系数矩阵
correlation = Correlation.corr(
    vector_variable,
    "continuous_features"
)



In [13]:
correlation.printSchema()

root
 |-- pearson(continuous_features): matrix (nullable = false)



In [14]:
correlation_values = correlation.collect()[0]["pearson(continuous_features)"].values
correlation_values

array([ 1.        ,  0.06467579,  0.47231246, ...,  0.07289498,
       -0.0391515 ,  1.        ])

In [15]:
# 删除高相关特征
def delHighlyCol(corr_values,inputcolumns,threshold=0.9):
    columns=inputcolumns.copy()
    # 提取高相关特征
    highly_corr = {}
    for i in range(len(columns)):
        for j in range(i):
            if abs(corr_values[i*len(columns)+j])>threshold:
                col = columns[i]
                related_col = columns[j]
                if col not in highly_corr:
                    highly_corr[col] = set()
                highly_corr[col].add(related_col)
    # 合并重复的特征并删除高相关特征
    for col,related_cols in list(highly_corr.items()):
        for related_col in list(related_cols):
            if related_col in highly_corr:
                highly_corr[col].update(highly_corr[related_col])
                del highly_corr[related_col]
    for col,related_cols in list(highly_corr.items()):
        for related_col in list(related_cols):
            columns.remove(related_col)
    return columns

In [16]:
continuous_columnsDelHigh=delHighlyCol(corr_values=correlation_values,inputcolumns=continuous_columns,threshold=0.95)

In [17]:
print(len(continuous_columnsDelHigh))
print(continuous_columnsDelHigh)

55
['一天去两家医院的天数', '月就诊医院数_AVG', '月就诊次数_MAX', '月统筹金额_MAX', '月药品金额_MAX', '医院_就诊天数_MAX', '医院_就诊天数_AVG', '医院_统筹金_MAX', '医院_统筹金_AVG', '医院_药品_MAX', '医院_药品_AVG', '医院编码_NN', '顺序号_NN', '交易时间DD_NN', '交易时间YYYYMM_NN', '药品费自费金额_SUM', '药品费申报金额_SUM', '贵重药品发生金额_SUM', '中成药费发生金额_SUM', '中草药费发生金额_SUM', '检查费自费金额_SUM', '检查费申报金额_SUM', '贵重检查费金额_SUM', '治疗费自费金额_SUM', '治疗费申报金额_SUM', '手术费自费金额_SUM', '手术费申报金额_SUM', '床位费申报金额_SUM', '医用材料发生金额_SUM', '高价材料发生金额_SUM', '医用材料费自费金额_SUM', '成分输血申报金额_SUM', '其它发生金额_SUM', '其它申报金额_SUM', '一次性医用材料申报金额_SUM', '起付线标准金额_MAX', '起付标准以上自负比例金额_SUM', '最高限额以上金额_SUM', '基本统筹基金支付金额_SUM', '公务员医疗补助基金支付金额_SUM', '基本个人账户支付_SUM', '非账户支付金额_SUM', '本次审批金额_SUM', '医疗救助医院申请_SUM', '残疾军人补助_SUM', '民政救助补助_SUM', '城乡优抚补助_SUM', '出院诊断病种名称_NN', '出院诊断LENTH_MAX', '药品在总金额中的占比', '个人支付的药品占比', '检查总费用在总金额占比', '个人支付检查费用占比', '治疗费用在总金额占比', '个人支付治疗费用占比']


In [18]:
print(len(continuous_columns))
print(continuous_columns)

75
['一天去两家医院的天数', '就诊的月数', '月就诊天数_MAX', '月就诊天数_AVG', '月就诊医院数_MAX', '月就诊医院数_AVG', '就诊次数_SUM', '月就诊次数_MAX', '月就诊次数_AVG', '月统筹金额_MAX', '月统筹金额_AVG', '月药品金额_MAX', '月药品金额_AVG', '医院_就诊天数_MAX', '医院_就诊天数_AVG', '医院_统筹金_MAX', '医院_统筹金_AVG', '医院_药品_MAX', '医院_药品_AVG', '医院编码_NN', '顺序号_NN', '交易时间DD_NN', '交易时间YYYYMM_NN', '个人账户金额_SUM', '统筹支付金额_SUM', 'ALL_SUM', '可用账户报销金额_SUM', '药品费发生金额_SUM', '药品费自费金额_SUM', '药品费申报金额_SUM', '贵重药品发生金额_SUM', '中成药费发生金额_SUM', '中草药费发生金额_SUM', '检查费发生金额_SUM', '检查费自费金额_SUM', '检查费申报金额_SUM', '贵重检查费金额_SUM', '治疗费发生金额_SUM', '治疗费自费金额_SUM', '治疗费申报金额_SUM', '手术费发生金额_SUM', '手术费自费金额_SUM', '手术费申报金额_SUM', '床位费发生金额_SUM', '床位费申报金额_SUM', '医用材料发生金额_SUM', '高价材料发生金额_SUM', '医用材料费自费金额_SUM', '成分输血申报金额_SUM', '其它发生金额_SUM', '其它申报金额_SUM', '一次性医用材料申报金额_SUM', '起付线标准金额_MAX', '起付标准以上自负比例金额_SUM', '医疗救助个人按比例负担金额_SUM', '最高限额以上金额_SUM', '基本统筹基金支付金额_SUM', '公务员医疗补助基金支付金额_SUM', '城乡救助补助金额_SUM', '基本个人账户支付_SUM', '非账户支付金额_SUM', '本次审批金额_SUM', '补助审批金额_SUM', '医疗救助医院申请_SUM', '残疾军人补助_SUM', '民政救助补助_SUM', '城乡优抚补助_SUM', '出院诊断病种名称_

In [19]:
#   XGBoost On PySpark
import random
from pyspark.ml import Pipeline
from xgboost.spark import SparkXGBClassifier,SparkXGBClassifierModel

In [20]:
# 构建在xgboost中使用的data
feature_cols=continuous_columnsDelHigh+binary_columns
label_col=target_column
pred_col_name="pred"
all_cols=[identifiers]+feature_cols+[label_col]
print(all_cols)
df=data.select(
    *all_cols
).withColumnRenamed(
    "个人编码","id"
).withColumn("id",F.monotonically_increasing_id()+1) #更新编号
df.show()

['个人编码', '一天去两家医院的天数', '月就诊医院数_AVG', '月就诊次数_MAX', '月统筹金额_MAX', '月药品金额_MAX', '医院_就诊天数_MAX', '医院_就诊天数_AVG', '医院_统筹金_MAX', '医院_统筹金_AVG', '医院_药品_MAX', '医院_药品_AVG', '医院编码_NN', '顺序号_NN', '交易时间DD_NN', '交易时间YYYYMM_NN', '药品费自费金额_SUM', '药品费申报金额_SUM', '贵重药品发生金额_SUM', '中成药费发生金额_SUM', '中草药费发生金额_SUM', '检查费自费金额_SUM', '检查费申报金额_SUM', '贵重检查费金额_SUM', '治疗费自费金额_SUM', '治疗费申报金额_SUM', '手术费自费金额_SUM', '手术费申报金额_SUM', '床位费申报金额_SUM', '医用材料发生金额_SUM', '高价材料发生金额_SUM', '医用材料费自费金额_SUM', '成分输血申报金额_SUM', '其它发生金额_SUM', '其它申报金额_SUM', '一次性医用材料申报金额_SUM', '起付线标准金额_MAX', '起付标准以上自负比例金额_SUM', '最高限额以上金额_SUM', '基本统筹基金支付金额_SUM', '公务员医疗补助基金支付金额_SUM', '基本个人账户支付_SUM', '非账户支付金额_SUM', '本次审批金额_SUM', '医疗救助医院申请_SUM', '残疾军人补助_SUM', '民政救助补助_SUM', '城乡优抚补助_SUM', '出院诊断病种名称_NN', '出院诊断LENTH_MAX', '药品在总金额中的占比', '个人支付的药品占比', '检查总费用在总金额占比', '个人支付检查费用占比', '治疗费用在总金额占比', '个人支付治疗费用占比', 'BZ_民政救助', 'BZ_城乡优抚', '是否挂号', 'RES']
+---+--------------------+----------------+--------------+--------------+--------------+-----------------+-----------------+---------

In [21]:
vec_assembler=VectorAssembler(inputCols=feature_cols,outputCol="features")
df_input = vec_assembler.transform(df)
classifier=SparkXGBClassifier(
    features_col="features",
    label_col=label_col,
    num_workers=3,
)

In [22]:
model = classifier.fit(df_input)
model.get_booster().feature_names = feature_cols

2024-01-23 18:05:35,351 INFO XGBoost-PySpark: _fit Running xgboost-2.0.3 on 3 workers with
	booster params: {'objective': 'binary:logistic', 'device': 'cpu', 'nthread': 1}
	train_call_kwargs_params: {'verbose_eval': True, 'num_boost_round': 100}
	dmatrix_kwargs: {'nthread': 1, 'missing': nan}
2024-01-23 18:05:48,783 INFO XGBoost-PySpark: _fit Finished xgboost training!   


In [23]:
pre_df = model.transform(df_input)
pre_df.show(50,False)

+---+--------------------+----------------+--------------+--------------+--------------+-----------------+-----------------+---------------+---------------+-------------+-------------+-----------+---------+-------------+-----------------+------------------+------------------+--------------------+--------------------+--------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+------------------+--------------------+--------------------+----------------------+--------------------+----------------+----------------+--------------------------+------------------+----------------------------+--------------------+------------------------+------------------------------+--------------------+------------------+----------------+--------------------+----------------+----------------+----------------+-------------------+-----------------+--------------------+------------------+----------------------+-------

                                                                                

In [24]:
model.get_feature_importances(importance_type="gain")

{'一天去两家医院的天数': 1.8250893354415894,
 '月就诊医院数_AVG': 1.4884222745895386,
 '月就诊次数_MAX': 9.308785438537598,
 '月统筹金额_MAX': 14.847107887268066,
 '月药品金额_MAX': 4.374019145965576,
 '医院_就诊天数_MAX': 2.177415132522583,
 '医院_就诊天数_AVG': 2.0453271865844727,
 '医院_统筹金_MAX': 2.0454723834991455,
 '医院_统筹金_AVG': 2.212063789367676,
 '医院_药品_MAX': 2.506988048553467,
 '医院_药品_AVG': 2.5995306968688965,
 '医院编码_NN': 0.6683475971221924,
 '顺序号_NN': 2.8423593044281006,
 '交易时间DD_NN': 3.8251492977142334,
 '交易时间YYYYMM_NN': 4.3345208168029785,
 '药品费自费金额_SUM': 2.2568447589874268,
 '药品费申报金额_SUM': 3.085120916366577,
 '贵重药品发生金额_SUM': 1.6935410499572754,
 '中成药费发生金额_SUM': 1.9440511465072632,
 '中草药费发生金额_SUM': 3.186933994293213,
 '检查费自费金额_SUM': 1.1030237674713135,
 '检查费申报金额_SUM': 2.1851398944854736,
 '贵重检查费金额_SUM': 1.6448674201965332,
 '治疗费自费金额_SUM': 1.5896481275558472,
 '治疗费申报金额_SUM': 2.032252311706543,
 '手术费申报金额_SUM': 1.2824745178222656,
 '床位费申报金额_SUM': 1.9254103899002075,
 '医用材料发生金额_SUM': 2.0970914363861084,
 '医用材料费自费金额_SUM': 1

In [58]:
features_importance = model.get_feature_importances(importance_type="total_gain")
features_importance = sorted(
    features_importance.items(),
    key=lambda x:x[1],
    reverse=True
)
features_importance = dict(features_importance)

In [59]:
features_importance

{'月统筹金额_MAX': 2761.56201171875,
 '月就诊次数_MAX': 781.93798828125,
 '基本统筹基金支付金额_SUM': 703.4177856445312,
 '月药品金额_MAX': 511.7602233886719,
 '本次审批金额_SUM': 414.4684753417969,
 '交易时间DD_NN': 390.16522216796875,
 '中草药费发生金额_SUM': 318.6933898925781,
 '顺序号_NN': 289.920654296875,
 '非账户支付金额_SUM': 271.1092529296875,
 '中成药费发生金额_SUM': 239.1182861328125,
 '药品在总金额中的占比': 237.83786010742188,
 '出院诊断LENTH_MAX': 222.36611938476562,
 '药品费自费金额_SUM': 200.8591766357422,
 '治疗费申报金额_SUM': 197.12847900390625,
 '医院_药品_AVG': 192.3652801513672,
 '医院_就诊天数_AVG': 192.26075744628906,
 '一次性医用材料申报金额_SUM': 189.69216918945312,
 '检查费申报金额_SUM': 185.7368927001953,
 '医院_统筹金_AVG': 183.60130310058594,
 '贵重药品发生金额_SUM': 182.90243530273438,
 '起付标准以上自负比例金额_SUM': 179.7655029296875,
 '医用材料发生金额_SUM': 167.76731872558594,
 '医院_就诊天数_MAX': 158.95130920410156,
 '药品费申报金额_SUM': 157.34117126464844,
 '治疗费用在总金额占比': 157.04220581054688,
 '医院_药品_MAX': 150.41928100585938,
 '检查总费用在总金额占比': 140.94390869140625,
 '基本个人账户支付_SUM': 139.69668579101562,
 '医院_统筹金_MA

In [60]:

from pyspark.ml.evaluation import BinaryClassificationEvaluator
kept = []
i=1
rem_dict = {}
for feature in features_importance.keys():
    kept.append(feature)
    df_kept = df.select(*kept,label_col)
    trainDF,testDF = df_kept.randomSplit([0.8,0.2],seed=14)
    kept_assembler = VectorAssembler(inputCols=kept,outputCol="feature_subset")
    classifierInFor = SparkXGBClassifier(
        max_depth = 5,
        missing = 0,
        features_col="feature_subset",
        label_col=label_col,
        num_workers=3,
        raw_prediction_col="raw_prediction"
    )
    with_selected_feature = kept_assembler.transform(trainDF)
    test_data = kept_assembler.transform(testDF)
    xgb_model = classifierInFor.fit(with_selected_feature)
    pre_xgb_df = xgb_model.transform(test_data)
    evaluator_model = BinaryClassificationEvaluator(
        rawPredictionCol="raw_prediction",
        labelCol=label_col,
        metricName="areaUnderROC"
    )
    eva_val = evaluator_model.evaluate(pre_xgb_df)
    rem_dict[i] = eva_val
    i=i+1

2024-01-23 21:27:45,847 INFO XGBoost-PySpark: _fit Running xgboost-2.0.3 on 3 workers with
	booster params: {'objective': 'binary:logistic', 'device': 'cpu', 'max_depth': 5, 'nthread': 1}
	train_call_kwargs_params: {'verbose_eval': True, 'num_boost_round': 100}
	dmatrix_kwargs: {'nthread': 1, 'missing': 0.0}
2024-01-23 21:27:50,518 INFO XGBoost-PySpark: _fit Finished xgboost training!   
2024-01-23 21:27:51,448 INFO XGBoost-PySpark: _fit Running xgboost-2.0.3 on 3 workers with
	booster params: {'objective': 'binary:logistic', 'device': 'cpu', 'max_depth': 5, 'nthread': 1}
	train_call_kwargs_params: {'verbose_eval': True, 'num_boost_round': 100}
	dmatrix_kwargs: {'nthread': 1, 'missing': 0.0}
2024-01-23 21:27:55,423 INFO XGBoost-PySpark: _fit Finished xgboost training!   
2024-01-23 21:27:56,431 INFO XGBoost-PySpark: _fit Running xgboost-2.0.3 on 3 workers with
	booster params: {'objective': 'binary:logistic', 'device': 'cpu', 'max_depth': 5, 'nthread': 1}
	train_call_kwargs_params: {'v

In [61]:
rem_dict

{1: 0.7935234933562302,
 2: 0.820094107582077,
 3: 0.9065800449149825,
 4: 0.9002063950379668,
 5: 0.9120692974013482,
 6: 0.9158699604320408,
 7: 0.9249353010373242,
 8: 0.9232787937119049,
 9: 0.9311923858410878,
 10: 0.9208095390867314,
 11: 0.9234456207892212,
 12: 0.9146187573521553,
 13: 0.9158763768580931,
 14: 0.9206844187787415,
 15: 0.9156325526681646,
 16: 0.926544754571704,
 17: 0.9208213025344889,
 18: 0.9162528071864,
 19: 0.9295155598331756,
 20: 0.9183360068441854,
 21: 0.9210865148112566,
 22: 0.9248508180943245,
 23: 0.9204063736498784,
 24: 0.9241450112287467,
 25: 0.9260592450005358,
 26: 0.9280269489894132,
 27: 0.91366057106192,
 28: 0.9173799593626363,
 29: 0.922470324029517,
 30: 0.922495989733719,
 31: 0.9199828895305325,
 32: 0.915681745267891,
 33: 0.9157309378676105,
 34: 0.915822906641005,
 35: 0.9225772644636936,
 36: 0.9185691369906971,
 37: 0.9236509464228397,
 38: 0.9193883007165013,
 39: 0.9244957758528501,
 40: 0.9166570420275891,
 41: 0.9129055715966