In [41]:
import pandas as pd

In [42]:
raw_data = pd.read_csv('IPL2.csv')
raw_data.head()

Unnamed: 0,stock,group,free_float,instrument_volumn,close_price
0,FPT,1,0.85,1269968875,92800
1,HPG,5,0.55,5814785700,26300
2,VNM,5,0.4,2089955445,74200
3,MWG,5,0.75,1462560047,52600
4,MSN,1,0.45,1423724783,76300


In [43]:
raw_data.columns

Index(['stock', 'group', 'free_float', 'instrument_volumn', 'close_price'], dtype='object')

In [39]:
raw_data['test_capital'] = raw_data['capital'].astype(int)
raw_data.head()

Unnamed: 0,stock,group,free_float,instrument_volumn,close_price,capital,raw_weight,adjust_weight1,adjust_weight2,test_capital
0,FPT,1,0.85,1269968875,92800,100175100000000.0,0.17064,0.1,0.098564,100175144860000
1,HPG,5,0.55,5814785700,26300,84110880000000.0,0.143276,0.1,0.090425,84110875150500
2,VNM,5,0.4,2089955445,74200,62029880000000.0,0.105663,0.1,0.090425,62029877607600
3,MWG,5,0.75,1462560047,52600,57697990000000.0,0.098284,0.1,0.090425,57697993854150
4,MSN,1,0.45,1423724783,76300,48883590000000.0,0.083269,0.1,0.098564,48883590424305


In [44]:
raw_data['capital'] = raw_data['free_float'] * raw_data['instrument_volumn'] * raw_data['close_price']
raw_data['capital'].astype(int)
raw_data['raw_weight'] = raw_data['capital'] / raw_data['capital'].sum()
raw_data.head()

Unnamed: 0,stock,group,free_float,instrument_volumn,close_price,capital,raw_weight
0,FPT,1,0.85,1269968875,92800,100175100000000.0,0.17064
1,HPG,5,0.55,5814785700,26300,84110880000000.0,0.143276
2,VNM,5,0.4,2089955445,74200,62029880000000.0,0.105663
3,MWG,5,0.75,1462560047,52600,57697990000000.0,0.098284
4,MSN,1,0.45,1423724783,76300,48883590000000.0,0.083269


In [17]:
from typing import List


def getGroupWeight(groups, weights):
    gWeight = {}
    for i in range(len(weights)):
        g = groups[i]
        if gWeight.get(g) is None:
            gWeight[g] = 0
        gWeight[g] += weights[i]
    return gWeight


def balanceWeights(groups: List[int], weights: List[float], visited: set):
    residual = 0
    remainSum = 0
    for i in range(len(weights)):
        if groups[i] in visited:
            continue
        if weights[i] >= 0.1:
            residual += weights[i] - 0.1
            weights[i] = 0.1
        else:
            remainSum += weights[i]
    for i in range(len(weights)):
        if groups[i] in visited:
            continue
        if weights[i] < 0.1:
            weights[i] = weights[i] * \
                (residual+remainSum) / remainSum
    return residual


def adjustWeights(groups: List[int], weights: List[float], visited: set):
    while True:
        if balanceWeights(groups, weights, visited) == 0:
            break
    return weights


def balanceGroups(groups: List[int], weights: List[float], visited: set):
    adjustWeights(groups, weights, visited)
    gWeight = getGroupWeight(groups, weights)
    residual = 0
    remainSum = 0
    for g in gWeight:
        if g in visited:
            continue
        if gWeight[g] >= 0.3:
            residual += gWeight[g] - 0.3
            visited.add(g)
        else:
            remainSum += gWeight[g]
    for i in range(len(weights)):
        if groups[i] in visited:
            continue
        if weights[i] == 0.1:
            remainSum -= 0.1

    for i in range(len(weights)):
        if groups[i] in visited:
            weights[i] *= 0.3 / gWeight[groups[i]]
            continue
        if gWeight[groups[i]] < 0.3 and weights[i] < 0.1:
            weights[i] = weights[i] * \
                (residual+remainSum) / remainSum
    return residual


def adjustGroups(groups: List[int], weights: List[float]) -> List[float]:
    if len(weights) < 10:
        return []
    gWeight = getGroupWeight(groups, weights)
    visited = set()
    while len(visited) < len(gWeight):
        if balanceGroups(groups, weights, visited) == 0:
            break
    return weights

In [38]:
raw_data.to_csv('result2.csv')

In [45]:
weights = raw_data['raw_weight'].to_list()
groups = raw_data['group'].to_list()
raw_data['adjust_weight1'] = pd.Series(adjustWeights(groups, weights, {}))
raw_data['adjust_weight2'] = pd.Series(adjustGroups(groups, weights))
raw_data

Unnamed: 0,stock,group,free_float,instrument_volumn,close_price,capital,raw_weight,adjust_weight1,adjust_weight2
0,FPT,1,0.85,1269968875,92800,100175100000000.0,0.17064,0.1,0.098564
1,HPG,5,0.55,5814785700,26300,84110880000000.0,0.143276,0.1,0.090425
2,VNM,5,0.4,2089955445,74200,62029880000000.0,0.105663,0.1,0.090425
3,MWG,5,0.75,1462560047,52600,57697990000000.0,0.098284,0.1,0.090425
4,MSN,1,0.45,1423724783,76300,48883590000000.0,0.083269,0.1,0.098564
5,PNJ,4,0.85,650000000,78800,43537000000000.0,0.074162,0.092965,0.1
6,GMD,1,0.9,301377957,66300,17983220000000.0,0.030633,0.0384,0.040416
7,GEX,2,0.7,851495793,20800,12397780000000.0,0.021119,0.026473,0.028617
8,DIG,4,0.8,609851995,25100,12245830000000.0,0.02086,0.026149,0.028266
9,HSG,3,0.85,615982309,20000,10471700000000.0,0.017838,0.02236,0.024171
