# Exciton Decomposition Analysis

There are qualititative and quantative analysis, for qualititive you can use summarize eigenvectors, just know the largest band to band transition and largest k point, then you will learn the band composition into parts.

For ploteigenvectors.py is also a qualititive analysis, summarize eigenvectors is also semi-qualiative, because of limited information: 

It only tells you about wtot, wmax and ikmax for different band to band transitions for each Exciton S - You cannot resolve K-point from this information, thus cannot safely project to atomic orbitals.

`wtot = sum_k |A_vck|^2. wmax = max_k |A_vck|^2. |A_vc (ikmax)|^2 = wmax.`


**To do a proper quantitative approach:**

Read eigenvectors.h5 to extract K resolved decomposition into different band to band transition, in combination with pdos information, I can calculate the percentage for perovskite/non-perovskite of electrons and holes for the every excitons, similar information are also calculated when I did the ECF-noshift plot, where I can see the localization of electrons and holes for each exciton in real space.

In [None]:
file:///Users/yinanchen/PhD/2022_Intergrowth_Hema/APD1_Na/absorption/k104/evec_component_k=0.png
file:///Users/yinanchen/PhD/2022_Intergrowth_Hema/APD1_Na/absorption/k104/evec_component_k=-1.png
file:///Users/yinanchen/PhD/2022_Intergrowth_Hema/APD2_Pb/corrected_H/absorption/evec_component_k=-1.png

In [69]:
import pandas as pd
import numpy as np

In [70]:
# Things to modify by hand

kgrid_file = '10104.log'
kgrid_file = 'kgrid.log_cocn.bak'

bandstructure_file = 'bandstructure.dat'
bandstructure_file = 'bandstructure.dat_cocn.bak'

fermi = 408 # ! VBM - start + 1 , hard-coded for APD2_Li now, read from projbands file
fermi = 280 # ! for HOCN
fermi = 304   # ! for COCN

eigenvector_file = 'eigenvectors.h5'
eigenvector_file = 'eigenvectors.h5_hocn.bak'
eigenvector_file = 'eigenvectors.h5_cocn.bak'

scf_file = 'scf.out'
scf_file = 'relax.out'
scf_file = 'relax.out_cocn.bak'

projbands_file = 'cl.projbands'
projbands_file = 'cl.projbands_hocn.bak'
projbands_file = 'cl.projbands_cocn.bak'

e_min = 0
e_high = 5

## Step 0: Find the K list correspondance from bandstructure.dat (unfolded) with the K list from kgrid.log (folded)

### 0.1 Process K list from Kgrid.log (folded)

In [104]:
## Process the kgrid.log, find the correpondance with uniform grid and irrduciible grid used to calculate WFN_fi

def process_kgrid_WFN(file_content):
    lines = file_content.split('\n')

    for i, line in enumerate(lines):
        if "k-points in the original uniform grid" in line:
            # Next line contains the number of rows to read
            num_rows = int(lines[i + 1].strip())
            # Data starts from the next line
            start_line = i + 2
            break

    data = []
    for line in lines[start_line:start_line + num_rows]:
        # Split the line into columns and convert to appropriate types
        columns = line.split()
        row = [int(columns[0])] + [float(c) for c in columns[1:5]] + [int(columns[5]), columns[6]]
        data.append(row)
    unfolded_idx = {}
    
    cnt = 0
    processed_data = []

    for row in data:
        row_number = row[0]
        reference_row = row[5]

        # If the sixth column is 0, increment the count
        if reference_row == 0:
            unfolded_idx[row_number] = cnt
            cnt = cnt + 1
        else:
            # If the sixth column is not 0, set the count to the count of the referenced row
            unfolded_idx[row_number] = unfolded_idx[reference_row]
        processed_data.append(row + [unfolded_idx[row_number]])

    return processed_data

processed_data = process_kgrid_WFN(open(kgrid_file).read())
# the last column and kx, ky, kz
processed_data = np.array(processed_data)[:, [1, 2, 3, 7]].astype(float)
nk = processed_data.shape[0]
processed_data

array([[ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.08333333,  1.        ],
       [ 0.        ,  0.        ,  0.16666667,  2.        ],
       ...,
       [ 0.75      ,  0.91666667,  0.75      , 64.        ],
       [ 0.75      ,  0.91666667,  0.83333333, 63.        ],
       [ 0.75      ,  0.91666667,  0.91666667, 62.        ]])

### 0.2 Process bandstructure.dat (unfolded)

In [72]:
def read_unfolded(file_path):
    matrix = []
    last_spin, last_band = None, None

    with open(file_path, 'r') as file:
        for line in file:
            if line.startswith('#'):
                continue

            parts = line.split()
            if len(parts) < 5:
                continue  
            spin, band, kx, ky, kz = parts[:5]

            # check if we have a new spin or band
            if last_spin is not None and (spin != last_spin or band != last_band):
                break
            # update last spin and band
            last_spin, last_band = spin, band

            matrix.append([float(kx), float(ky), float(kz)])

    return np.array(matrix)

data_matrix = read_unfolded(bandstructure_file)
print(data_matrix.shape)


(576, 3)


In [73]:

#reciprocal_lattice_vectors and alat are read from relax.out or scf.out

def extract_specific_data(file_path):
    with open(file_path, 'r') as file:
        alat = None
        b_vectors = []

        for line in file:
            if 'lattice parameter (alat)' in line:
                parts = line.split('=')
                if len(parts) >= 2:
                    alat = float(parts[1].split()[0])

            if 'reciprocal axes' in line:
                for i in range(3):
                    b_vectors.append(file.readline().split()[3:6])
            # ! Heavily depends on the format of the file
            if alat is not None and len(b_vectors) == 3:
                break

    return alat, np.array(b_vectors, dtype=float)

alat, b_vectors = extract_specific_data(scf_file)
print(alat)
print(b_vectors)
two_pi_over_alat = 2 * np.pi / alat
reciprocal_lattice_vectors = np.array(b_vectors) * two_pi_over_alat
reciprocal_lattice_vectors

34.7991
[[ 1.001424 -0.        0.250129]
 [-0.        2.092308 -0.      ]
 [ 0.012937 -0.        2.298764]]


array([[ 0.18081308, -0.        ,  0.04516228],
       [-0.        ,  0.3777787 , -0.        ],
       [ 0.00233585, -0.        ,  0.41505557]])

In [74]:
# Convert cartesian coordinates to fractional coordinates
def cartesian_to_fractional(cartesian_coordinates, reciprocal_lattice_vectors):
    return np.dot(cartesian_coordinates, np.linalg.inv(reciprocal_lattice_vectors))

def fractional_to_cartesian(fractional_coordinates, reciprocal_lattice_vectors):
    return np.dot(fractional_coordinates, reciprocal_lattice_vectors)

fractional_coordinates = cartesian_to_fractional(data_matrix, reciprocal_lattice_vectors)
# Around to 3 decimals
fractional_coordinates = np.around(fractional_coordinates, decimals=3)
fractional_coordinates

array([[ 0.   ,  0.   ,  0.   ],
       [-0.   ,  0.   ,  0.083],
       [ 0.   ,  0.   , -0.083],
       ...,
       [-0.5  ,  0.5  ,  0.417],
       [ 0.5  ,  0.5  , -0.417],
       [-0.5  ,  0.5  ,  0.5  ]])

In [75]:
# Convert fractional coordinates to the BZ within (0,0,0) to (1,1,1) by translating the coordinates
def translate_to_BZ(fractional_coordinates):
    return fractional_coordinates - np.floor(fractional_coordinates)

translated_coordinates = translate_to_BZ(fractional_coordinates)
translated_coordinates[:20]

array([[0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.083],
       [0.   , 0.   , 0.917],
       [0.   , 0.   , 0.167],
       [0.   , 0.   , 0.833],
       [0.   , 0.   , 0.25 ],
       [0.   , 0.   , 0.75 ],
       [0.   , 0.   , 0.333],
       [0.   , 0.   , 0.667],
       [0.   , 0.   , 0.417],
       [0.   , 0.   , 0.583],
       [0.   , 0.   , 0.5  ],
       [0.   , 0.083, 0.   ],
       [0.   , 0.917, 0.   ],
       [0.   , 0.083, 0.083],
       [0.   , 0.083, 0.917],
       [0.   , 0.917, 0.917],
       [0.   , 0.917, 0.083],
       [0.   , 0.083, 0.167],
       [0.   , 0.083, 0.833]])

In [76]:
# Sort the translated coordinates first by kx, then by ky, then by kz, and keeping the original indices
def sort_by_k(translated_coordinates):
    indices = np.lexsort((translated_coordinates[:,2], translated_coordinates[:,1], translated_coordinates[:,0]))
    return translated_coordinates[indices], indices

sorted_coordinates, indices = sort_by_k(translated_coordinates)
print(sorted_coordinates[:20])
print(indices[:20])

[[0.    0.    0.   ]
 [0.    0.    0.083]
 [0.    0.    0.167]
 [0.    0.    0.25 ]
 [0.    0.    0.333]
 [0.    0.    0.417]
 [0.    0.    0.5  ]
 [0.    0.    0.583]
 [0.    0.    0.667]
 [0.    0.    0.75 ]
 [0.    0.    0.833]
 [0.    0.    0.917]
 [0.    0.083 0.   ]
 [0.    0.083 0.083]
 [0.    0.083 0.167]
 [0.    0.083 0.25 ]
 [0.    0.083 0.333]
 [0.    0.083 0.417]
 [0.    0.083 0.5  ]
 [0.    0.083 0.583]]
[ 0  1  3  5  7  9 11 10  8  6  4  2 12 14 18 22 26 30 34 31]


In [77]:
# append the indices to the processed_data
processed_kgrid_wfn_eigenvector = np.hstack((processed_data, indices.reshape(-1,1)))
processed_kgrid_wfn_eigenvector[:20]

array([[ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.08333333,  1.        ,  1.        ],
       [ 0.        ,  0.        ,  0.16666667,  2.        ,  3.        ],
       [ 0.        ,  0.        ,  0.25      ,  3.        ,  5.        ],
       [ 0.        ,  0.        ,  0.33333333,  4.        ,  7.        ],
       [ 0.        ,  0.        ,  0.41666667,  5.        ,  9.        ],
       [ 0.        ,  0.        ,  0.5       ,  6.        , 11.        ],
       [ 0.        ,  0.        ,  0.58333333,  5.        , 10.        ],
       [ 0.        ,  0.        ,  0.66666667,  4.        ,  8.        ],
       [ 0.        ,  0.        ,  0.75      ,  3.        ,  6.        ],
       [ 0.        ,  0.        ,  0.83333333,  2.        ,  4.        ],
       [ 0.        ,  0.        ,  0.91666667,  1.        ,  2.        ],
       [ 0.        ,  0.08333333,  0.        ,  7.        , 12.        ],
       [ 0.        ,  0.08333333,  0.0

In [78]:
# Check both indices are correct, the first one should be foled index, the second one is the unfolded index
print(max(processed_kgrid_wfn_eigenvector[:,4]))
print(max(processed_kgrid_wfn_eigenvector[:,3]))
print(len(processed_kgrid_wfn_eigenvector))

575.0
181.0
576


## Step 1: Decomposition of excitons into the contribution of band-to-band transitions at different k points

Specifically, I read and store sum_v/c |Acvk|^2 for each (S,k) pair for each band in c, v respectively. Then I can also plot the distribution of the contribution of each band-to-band transition to the exciton wavefunction as I did using the script plot_eigenvectors_kloop.py.

file:///Users/yinanchen/PhD/2022_Intergrowth_Hema/APD1_Na/absorption/k104/evec_component_k=0.png
file:///Users/yinanchen/PhD/2022_Intergrowth_Hema/APD1_Na/absorption/k104/evec_component_k=-1.png

In [79]:
with open(projbands_file, 'r') as f:
    lines = f.readlines()

comments = []
for line in lines:
    if line.startswith('#'):
        comments.append(line)
    else:
        break

data = []
for line in lines:
    if not line.startswith('#'):
        data.append(line)


In [80]:
# convert the list of strings to a list of lists, 2D array
data = [line.split() for line in data]

In [81]:
# conver the 2d array to a dataframe, and convert the strings to floats
df = pd.DataFrame(data).astype(float)

In [82]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
0,1.0,0.00000,-21.77144,0.937,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.016,0.016,0.004,0.002,0.002,0.004,0.003,0.003
1,1.0,0.00000,-21.77144,0.937,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.016,0.016,0.004,0.002,0.002,0.004,0.003,0.003
2,1.0,0.00000,-21.77114,0.937,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.016,0.016,0.004,0.002,0.002,0.004,0.003,0.003
3,1.0,0.00000,-21.77114,0.937,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.016,0.016,0.004,0.002,0.002,0.004,0.003,0.003
4,1.0,0.00000,-21.76953,0.944,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.016,0.016,0.004,0.002,0.002,0.004,0.003,0.003
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65515,182.0,61.31669,5.79931,0.616,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.003,0.003,0.004,0.004,0.004,0.004,0.003,0.003
65516,182.0,61.31669,5.92845,0.520,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.001,0.001,0.004,0.004,0.004,0.004,0.003,0.003
65517,182.0,61.31669,5.92845,0.520,0.0,0.0,0.0,0.0,0.0,0.0,...,0.003,0.003,0.001,0.001,0.004,0.004,0.004,0.004,0.003,0.003
65518,182.0,61.31669,5.94322,0.480,0.0,0.0,0.0,0.0,0.0,0.0,...,0.004,0.004,0.001,0.001,0.004,0.002,0.002,0.004,0.004,0.004


In [83]:
import numpy as np
import h5py as h5
import matplotlib.pyplot as plt
contrib_c_dict = {}
contrib_v_dict = {}

def plot_for_k(k_select):
    f = h5.File(eigenvector_file,'r')
    evals = f['exciton_data/eigenvalues'][()]
    evc = f['exciton_data/eigenvectors'][()]
    evc = evc[0,:,:,:,:,0,:]
    (nS, nk, nc, nv, _) = evc.shape

    if k_select == -1:
        pass
    else:
        evc = evc[:,[k_select],:,:,:]

    for iN_S in range(nS):
        if e_min<evals[iN_S]<e_high:
            temp_contrib_cv = np.sum(abs(evc[iN_S,:,:,:,0]+evc[iN_S,:,:,:,1]*1j)**2, axis=0) # Summarize_eigenvectors and Ploteigenvectors - For a certain k and c, calculating the sum of Acvk across all vs, and vice versa, this is the correct way to do it, because for a certain k, valence band pdos idoes not change when we fix a v, and vice versa, But you ignore the correlation between c and v, thus you miss the ECF, but you can only plot ECF_noshift
            temp_contrib_v = np.sum(temp_contrib_cv, axis=0)
            temp_contrib_c = np.sum(temp_contrib_cv, axis=1)

            contrib_c_dict[(iN_S,k_select)] = temp_contrib_c
            contrib_v_dict[(iN_S,k_select)] = temp_contrib_v
    return nS, nk, nc, nv

In [84]:
for k in range(0,nk):
    nS, nk, nc, nv = plot_for_k(k)
    #print(contrib_c_dict)
    #print(contrib_v_dict)
print(nk)

576


In [85]:
contrib_c_dict

{(0,
  0): array([3.96604712e-02, 4.19372797e-02, 6.51726787e-10, 6.85448232e-10,
        2.17388757e-10, 2.10749955e-10, 2.62377979e-10, 2.60862891e-10,
        1.83264322e-08, 1.87596144e-08, 1.04052227e-08, 1.00715172e-08,
        1.45158863e-10, 1.46263489e-10, 3.82990905e-11, 3.87134564e-11,
        1.90062278e-09, 1.95412726e-09, 2.57483645e-10, 2.49008610e-10]),
 (1,
  0): array([5.20027139e-02, 2.96312071e-02, 3.43888178e-09, 2.82619570e-09,
        2.63004277e-10, 3.62840509e-10, 2.24461099e-10, 2.28393401e-10,
        1.75007181e-08, 1.84824011e-08, 7.95607672e-09, 1.24011754e-08,
        2.46952322e-10, 3.48887914e-10, 3.57990601e-11, 3.49211321e-11,
        2.02980184e-09, 2.24153038e-09, 2.13791449e-10, 2.19820237e-10]),
 (2,
  0): array([3.12902513e-02, 5.03520934e-02, 3.10456534e-10, 8.68985112e-10,
        4.21991256e-10, 3.53697055e-10, 1.74655566e-10, 1.25464166e-10,
        1.94151181e-08, 1.73778764e-08, 1.13637063e-08, 7.51617208e-09,
        1.94631254e-10, 9.1840

In [86]:
nS, nk, nc, nv

(10, 576, 20, 20)

In [87]:
# find the corresponding pdos for each k from df
# Count the number of k points in df, should be consistent with nk
n_k = len(df[0].unique())
n_k

182

## Step 2: Calculate the kpdos on a WFN_fi calculation

Now that we have the decomposition of excitons into the contribution of band-to-band transitions for each (S,k) pair.

For every k, find the corresponding pdos from K-resolved Projected DOS on WFN_fi.

In [88]:
# Before that, we need to isolating only the middle nv+nc rows for each k, 
# corresponding to the nv valence and nc conduction bands

# First group df by the first column, then operate on each group
# For each group, take out the middle nv+nc rows around fermi, i.e. fermi - nv : fermi + nc
# ! Locate the position of fermi, i.e. the position where df[2] changes from negative to positive. 

conduction_df = df.groupby(0).apply(lambda x: x.iloc[fermi:fermi+nc,:])
valence_df = df.groupby(0).apply(lambda x: x.iloc[fermi-nv:fermi,:])
reversed_valence_df = valence_df.iloc[::-1,:]
reversed_valence_df

Unnamed: 0_level_0,Unnamed: 1_level_0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
182.0,65463,182.0,61.31669,-0.20377,0.980,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182.0,65462,182.0,61.31669,-0.20377,0.980,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182.0,65461,182.0,61.31669,-0.24970,0.980,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182.0,65460,182.0,61.31669,-0.24970,0.980,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
182.0,65459,182.0,61.31669,-0.62392,0.952,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1.0,288,1.0,0.00000,-0.89231,0.948,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,287,1.0,0.00000,-1.03809,0.956,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,286,1.0,0.00000,-1.03809,0.956,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1.0,285,1.0,0.00000,-1.05109,0.956,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [89]:
# This step is to check the vs are negative and cs are positive
# To ensure that the hard-coded fermi is correct

valence_df.loc[1]
print(valence_df.loc[1][2].values)
print(reversed_valence_df.loc[1][2].values)
print(conduction_df.loc[1][2].values)

[-1.05109 -1.05109 -1.03809 -1.03809 -0.89231 -0.89231 -0.85469 -0.85469
 -0.78266 -0.78266 -0.71624 -0.71624 -0.64673 -0.64673 -0.61401 -0.61401
 -0.33678 -0.33678 -0.00628 -0.00628]
[-0.00628 -0.00628 -0.33678 -0.33678 -0.61401 -0.61401 -0.64673 -0.64673
 -0.71624 -0.71624 -0.78266 -0.78266 -0.85469 -0.85469 -0.89231 -0.89231
 -1.03809 -1.03809 -1.05109 -1.05109]
[0.90854 0.90854 1.93573 1.93573 1.98048 1.98048 2.03332 2.03332 2.03738
 2.03738 2.56187 2.56187 3.12817 3.12817 3.25896 3.25896 3.2648  3.2648
 3.38972 3.38972]


In [90]:
# To correspond to the contribution of valence bands (counting from the VBM) and conduction bands (counting from the CBM)
# We need to reverse the order of the valence bands

reversed_valence_df.loc[40]
#conduction_df.loc[40]

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
14343,40.0,12.32602,-0.20297,0.988,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14342,40.0,12.32602,-0.20299,0.988,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14341,40.0,12.32602,-0.26025,0.976,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14340,40.0,12.32602,-0.26027,0.976,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14339,40.0,12.32602,-0.63408,0.952,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14338,40.0,12.32602,-0.63413,0.952,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14337,40.0,12.32602,-0.65693,0.96,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14336,40.0,12.32602,-0.65699,0.96,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14335,40.0,12.32602,-0.69831,0.964,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14334,40.0,12.32602,-0.69836,0.964,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Step 3: Combine the knowledge of K resolved pdos and exciton decomposition into bands

For each k point, find out the decomposition of bands into atomic orbitals

In [91]:
# Now we use contrib_v_dict[(s,k)] as the weight to multiply with the pdos of the valence band at k = k_select
# Then we sum over all k points, and we get the pdos of the valence band

# ! Note, Question: should we normalize the weight? i.e. divided by the sum of the weights over all vs or cs for each k point?
# ! Answer: No, because each k point in BZ should have a different total weight as seen by the plot_eigenvectors.py plots,
# #! so we should not normalize the weight for each k point.

# ! Also, I am not normalizing pdos (not divided by TOTAL (ranging from 0.7 - 0.9) in column 4), 
# ! using their original pdos at (k, n), where n is the band index.
# ! We should normalize the pdos by the TOTAL

s_select = 9
result_df_valence = pd.DataFrame()
result_df_conduction = pd.DataFrame()
for k_select in range(len(processed_kgrid_wfn_eigenvector)):
    k_select_kpdos =  int(processed_kgrid_wfn_eigenvector[k_select][3])
    k_select_eigenvector = int(processed_kgrid_wfn_eigenvector[k_select][4])
    weighted_sum_valence = reversed_valence_df.loc[k_select_kpdos+1].mul(contrib_v_dict[(s_select,k_select_eigenvector)], axis=0).sum()
    # \ / contrib_v_dict[(s_select,k_select)].sum()
    result_df_weighted_sum_valence = pd.DataFrame([weighted_sum_valence])

    weighted_sum_conduction = conduction_df.loc[k_select_kpdos+1].mul(contrib_c_dict[(s_select,k_select_eigenvector)], axis=0).sum()
    # \ / contrib_c_dict[(s_select,k_select)].sum()
    result_df_weighted_sum_conduction = pd.DataFrame([weighted_sum_conduction])

    # append all k points into a single dataframe
    #result_df = result_df.append(result_df_weighted_sum)
    result_df_valence = pd.concat([result_df_valence, result_df_weighted_sum_valence], axis=0)
    result_df_conduction = pd.concat([result_df_conduction, result_df_weighted_sum_conduction], axis=0)

# sum over all k points, row-wise
print(result_df_valence.sum(axis=0))
print(result_df_conduction.sum(axis=0))
result_df_conduction

0      66.556395
1      22.765365
2      -0.008179
3       0.986554
4       0.000000
         ...    
427     0.000000
428     0.000000
429     0.000000
430     0.000000
431     0.000000
Length: 432, dtype: float64
0      66.556395
1      22.765365
2       0.915625
3       0.973662
4       0.000000
         ...    
427     0.000000
428     0.000000
429     0.000000
430     0.000000
431     0.000000
Length: 432, dtype: float64


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
0,4.033413e-01,0.000000e+00,3.664517e-01,3.936611e-01,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2.656464e-02,2.544494e-03,1.250560e-02,1.285729e-02,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,3.771641e-04,4.816763e-05,1.298590e-04,1.227037e-04,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,1.907226e-06,2.740207e-07,5.576574e-07,4.576937e-07,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,9.572571e-08,1.467035e-08,2.584678e-08,1.852059e-08,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,1.607122e-06,5.212048e-07,3.574750e-08,2.320035e-08,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2.403113e-05,7.841852e-06,4.827979e-07,3.495148e-07,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,2.648993e-04,8.699129e-05,4.793385e-06,3.944850e-06,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,4.640383e-03,1.533794e-03,7.595900e-05,7.076546e-05,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [92]:
ns_accumulated_df_valence = pd.DataFrame()
ns_accumulated_df_conduction = pd.DataFrame()
for s_select in range(nS):
    accumulated_df_valence = pd.DataFrame()
    accumulated_df_conduction = pd.DataFrame()
    for k_select in range(len(processed_kgrid_wfn_eigenvector)):
        k_select_kpdos = int(processed_kgrid_wfn_eigenvector[k_select][3])
        k_select_eigenvector = int(processed_kgrid_wfn_eigenvector[k_select][4])
        current_weighted_sum_valence = reversed_valence_df.loc[k_select_kpdos+1].mul(contrib_v_dict[(s_select, k_select_eigenvector)], axis=0).sum()
        current_weighted_sum_conduction = conduction_df.loc[k_select_kpdos+1].mul(contrib_c_dict[(s_select, k_select_eigenvector)], axis=0).sum()
        if accumulated_df_valence.empty:
            accumulated_df_valence = pd.DataFrame([current_weighted_sum_valence])
        else:
            accumulated_df_valence += current_weighted_sum_valence
        if accumulated_df_conduction.empty:
            accumulated_df_conduction = pd.DataFrame([current_weighted_sum_conduction])
        else:
            accumulated_df_conduction += current_weighted_sum_conduction
    ns_accumulated_df_valence = pd.concat([ns_accumulated_df_valence, accumulated_df_valence], axis=0)
    ns_accumulated_df_conduction = pd.concat([ns_accumulated_df_conduction, accumulated_df_conduction], axis=0)

print(accumulated_df_conduction)
#result_df
ns_accumulated_df_conduction


         0          1         2         3    4    5    6    7    8    9    \
0  66.556395  22.765365  0.915625  0.973662  0.0  0.0  0.0  0.0  0.0  0.0   

   ...  422  423  424  425  426  427  428  429  430  431  
0  ...  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  

[1 rows x 432 columns]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
0,62.890322,20.994307,0.947151,0.967908,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.887234,20.99331,0.94713,0.967909,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.887396,20.993401,0.947119,0.96791,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.884399,20.992442,0.947102,0.967912,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.479116,17.655584,0.916422,0.973155,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.478736,17.655434,0.916421,0.973155,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.478013,17.655205,0.91642,0.973155,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.477782,17.655102,0.916419,0.973156,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,66.556117,22.765269,0.915625,0.973662,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,66.556395,22.765365,0.915625,0.973662,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [93]:
ns_accumulated_df_valence

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,422,423,424,425,426,427,428,429,430,431
0,62.890322,20.994307,-0.027829,0.983464,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.887234,20.99331,-0.027817,0.983466,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.887396,20.993401,-0.027811,0.983467,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,62.884399,20.992442,-0.027801,0.983468,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.479116,17.655584,-0.008647,0.987649,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.478736,17.655434,-0.008646,0.987649,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.478013,17.655205,-0.008646,0.987649,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,53.477782,17.655102,-0.008645,0.987649,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,66.556117,22.765269,-0.00818,0.986554,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
0,66.556395,22.765365,-0.008179,0.986554,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [94]:
max(accumulated_df_conduction.iloc[:,4:].sum(axis=0))

0.08500508157251017

In [95]:
# Save comments and ns_accumulated_df_valence to a file, with every row in df is a single line
with open('pdos_sum_valence.dat', 'w') as f:
    for line in comments:
        f.write(line)
    for i in range(len(ns_accumulated_df_valence)):
        f.write(str(ns_accumulated_df_valence.iloc[i,:].values.tolist()).strip('[]') + '\n')


with open('pdos_sum_conduction.dat', 'w') as f:
    for line in comments:
        f.write(line)
    for i in range(len(ns_accumulated_df_conduction)):
        f.write(str(ns_accumulated_df_conduction.iloc[i,:].values.tolist()).strip('[]') + '\n')

## Step 4: Calculate the projectiong into different part of the heterostructure

This part is system specific, for now I just hard-coded it

### 4.1 for APD2_Pb

In [28]:
# columns from 37 to 72, 105 to 168 as the Br-per. contribution
# columns from 1 to 36, 73 to 104 as the Br-non. contribution
# columns from 169 to 232 as the O contribution
# columns from 609 to 624 as the Li contribution
# Total is 1 to 656

conduction_perovskite = ns_accumulated_df_conduction.iloc[:, 37+3:73+3].sum(axis=1) + ns_accumulated_df_conduction.iloc[:, 105+3:169+3].sum(axis=1)
conduction_non_perovskite = ns_accumulated_df_conduction.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_conduction.iloc[:, 73+3:105+3].sum(axis=1)
conduction_O = ns_accumulated_df_conduction.iloc[:, 169+3:233+3].sum(axis=1)
conduction_Li = ns_accumulated_df_conduction.iloc[:, 609+3:625+3].sum(axis=1)
conduction_total = ns_accumulated_df_conduction.iloc[:, 4:660].sum(axis=1)
conduction_other = ns_accumulated_df_conduction.iloc[:, 233+3:609+3].sum(axis=1) + ns_accumulated_df_conduction.iloc[:, 625+3:657+3].sum(axis=1)
conduction_others = conduction_total - conduction_perovskite - conduction_non_perovskite - conduction_O - conduction_Li - conduction_other

In [29]:
conduction_perovskite/ conduction_total

0    0.999970
0    0.999967
0    0.999920
0    0.999917
0    0.999997
0    0.999997
0    0.999997
0    0.999997
0    0.999997
0    0.999998
dtype: float64

In [30]:
valence_perovskite = ns_accumulated_df_valence.iloc[:, 37+3:73+3].sum(axis=1) + ns_accumulated_df_valence.iloc[:, 105+3:169+3].sum(axis=1)
valence_non_perovskite = ns_accumulated_df_valence.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_valence.iloc[:, 73+3:105+3].sum(axis=1)
valence_O = ns_accumulated_df_valence.iloc[:, 169+3:233+3].sum(axis=1)
valence_Li = ns_accumulated_df_valence.iloc[:, 609+3:625+3].sum(axis=1)
valence_total = ns_accumulated_df_valence.iloc[:, 5:660].sum(axis=1)
valence_other = ns_accumulated_df_valence.iloc[:, 233+3:609+3].sum(axis=1) + ns_accumulated_df_valence.iloc[:, 625+3:657+3].sum(axis=1)
valence_others = valence_total - valence_perovskite - valence_non_perovskite - valence_O - valence_Li - valence_other

In [31]:
valence_non_perovskite/valence_total

0    0.393840
0    0.386394
0    0.410400
0    0.402801
0    0.417312
0    0.433765
0    0.407704
0    0.411632
0    0.423943
0    0.415863
dtype: float64

In [101]:
valence_Li/valence_total

0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
0    0.0
dtype: float64

In [32]:
valence_O/valence_total

0    0.403443
0    0.409939
0    0.393395
0    0.400434
0    0.389954
0    0.379879
0    0.395808
0    0.398334
0    0.391333
0    0.391598
dtype: float64

In [33]:
valence_other/valence_total

0    0.202411
0    0.203339
0    0.195973
0    0.196516
0    0.192704
0    0.186337
0    0.196453
0    0.190005
0    0.184704
0    0.192504
dtype: float64

### 4.2 for Hypothetical HOCN - Pb-Pb Intergrowth

In [66]:
# columns from 1 to 36, 173 to 236 as the Br-per. contribution
# columns from 37 to 172 as the Br-non. contribution
# columns from 237 to 268 as the O contribution
# columns from 269 to 380 as the Li contribution
# Total is 1 to 380


conduction_perovskite = ns_accumulated_df_conduction.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_conduction.iloc[:, 173+3:237+3].sum(axis=1)
conduction_non_perovskite = ns_accumulated_df_conduction.iloc[:, 37+3:173+3].sum(axis=1)
conduction_O = ns_accumulated_df_conduction.iloc[:, 237+3:269+3].sum(axis=1)
conduction_other = ns_accumulated_df_conduction.iloc[:, 269+3:381+3].sum(axis=1)
conduction_total = ns_accumulated_df_conduction.iloc[:, 4:381].sum(axis=1)

In [67]:
conduction_non_perovskite/ conduction_total

0    0.999995
0    0.999995
0    0.999995
0    0.999993
0    1.000000
0    1.000000
0    1.000000
0    1.000000
0    0.999999
0    1.000000
dtype: float64

In [65]:
valence_perovskite = ns_accumulated_df_valence.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_valence.iloc[:, 173+3:237+3].sum(axis=1)
valence_non_perovskite = ns_accumulated_df_valence.iloc[:, 37+3:173+3].sum(axis=1)
valence_O = ns_accumulated_df_valence.iloc[:, 237+3:269+3].sum(axis=1)
valence_other = ns_accumulated_df_valence.iloc[:, 269+3:381+3].sum(axis=1)
valence_total = ns_accumulated_df_valence.iloc[:, 5:381].sum(axis=1)

In [68]:
valence_perovskite/valence_total

0    0.999999
0    0.999999
0    0.999999
0    0.999999
0    1.000000
0    1.000000
0    1.000000
0    1.000000
0    1.000000
0    1.000000
dtype: float64

### 4.3 for Hypothetical HOCN - Pb-Pb Intergrowth

In [96]:
# columns from 1 to 36, 173 to 236 as the Br-per. contribution
# columns from 37 to 172 as the Br-non. contribution
# columns from 237 to 268 as the O contribution
# columns from 269 to 380 as the Li contribution
# Total is 1 to 380


conduction_perovskite = ns_accumulated_df_conduction.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_conduction.iloc[:, 173+3:237+3].sum(axis=1)
conduction_non_perovskite = ns_accumulated_df_conduction.iloc[:, 37+3:173+3].sum(axis=1)
conduction_O = ns_accumulated_df_conduction.iloc[:, 237+3:269+3].sum(axis=1)
conduction_other = ns_accumulated_df_conduction.iloc[:, 269+3:381+3].sum(axis=1)
conduction_total = ns_accumulated_df_conduction.iloc[:, 4:381].sum(axis=1)


valence_perovskite = ns_accumulated_df_valence.iloc[:, 1+3:37+3].sum(axis=1) + ns_accumulated_df_valence.iloc[:, 173+3:237+3].sum(axis=1)
valence_non_perovskite = ns_accumulated_df_valence.iloc[:, 37+3:173+3].sum(axis=1)
valence_O = ns_accumulated_df_valence.iloc[:, 237+3:269+3].sum(axis=1)
valence_other = ns_accumulated_df_valence.iloc[:, 269+3:381+3].sum(axis=1)
valence_total = ns_accumulated_df_valence.iloc[:, 5:381].sum(axis=1)

In [97]:
conduction_non_perovskite/ conduction_total

0    0.999986
0    0.999985
0    0.999986
0    0.999986
0    0.999999
0    0.999999
0    0.999999
0    0.999999
0    0.999999
0    0.999999
dtype: float64

In [98]:
valence_perovskite/valence_total

0    0.999999
0    0.999999
0    0.999999
0    0.999999
0    1.000000
0    1.000000
0    1.000000
0    1.000000
0    1.000000
0    1.000000
dtype: float64