In [88]:
!top

[?1h=[H[2J[mtop - 03:47:52 up 44 days, 21:12,  0 users,  load average: 6.91, 4.09, 3.49[m[m[m[m[K
Tasks:[m[m[1m  25 [m[mtotal,[m[m[1m   1 [m[mrunning,[m[m[1m  24 [m[msleeping,[m[m[1m   0 [m[mstopped,[m[m[1m   0 [m[mzombie[m[m[m[m[K
%Cpu(s):[m[m[1m  0.8 [m[mus,[m[m[1m  1.3 [m[msy,[m[m[1m  0.0 [m[mni,[m[m[1m 84.3 [m[mid,[m[m[1m 13.6 [m[mwa,[m[m[1m  0.0 [m[mhi,[m[m[1m  0.0 [m[msi,[m[m[1m  0.0 [m[mst[m[m[m[m[K
MiB Mem :[m[m[1m  64139.4 [m[mtotal,[m[m[1m    450.0 [m[mfree,[m[m[1m  15676.5 [m[mused,[m[m[1m  48012.9 [m[mbuff/cache[m[m[m[m[K
MiB Swap:[m[m[1m   7813.0 [m[mtotal,[m[m[1m   2133.4 [m[mfree,[m[m[1m   5679.6 [m[mused.[m[m[1m  47718.3 [m[mavail Mem [m[m[m[m[K
[K
[7m    PID USER      PR  NI    VIRT    RES    SHR S  %CPU  %MEM     TIME+ COMMAND  [m[m[K
[m1339916 root      20   0   11.8g  84000  19356 D  20.0   0.1  11:35.87 node     [m[m[K
[m1

In [57]:
import torch
from torch.utils.data import DataLoader, Dataset
import torchaudio
from transformers import AutoFeatureExtractor, ASTForAudioClassification
from glob import glob
from collections import defaultdict
from tqdm import tqdm

# Define dataset class
class AudioDataset(Dataset):
    def __init__(self, file_paths, feature_extractor, target_sampling_rate=16000):
        self.file_paths = file_paths
        self.feature_extractor = feature_extractor
        self.target_sampling_rate = target_sampling_rate

    def __len__(self):
        return len(self.file_paths)

    def __getitem__(self, idx):
        file_path = self.file_paths[idx]
        data, sr = torchaudio.load(file_path)
        data = torchaudio.functional.resample(data, orig_freq=sr, new_freq=self.target_sampling_rate)
        data = data.squeeze()
        inputs = self.feature_extractor(data, sampling_rate=self.target_sampling_rate, return_tensors="pt")
        inputs['input_values'] = inputs['input_values'].squeeze(0)  # Remove batch dimension
        return inputs['input_values'], file_path

# Load model and feature extractor
feature_extractor = AutoFeatureExtractor.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model = ASTForAudioClassification.from_pretrained("MIT/ast-finetuned-audioset-10-10-0.4593")
model = model.to('cuda:0')
model.eval()

# Parameters
batch_size = 32  # Adjust batch size according to your GPU memory
file_paths = glob("/root/data/test/*.ogg")
dataset = AudioDataset(file_paths, feature_extractor)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

# Inference function
def inference_batch(dataloader, k=5, with_logit=False):
    d = defaultdict(int)
    counts = 0
    tqdm_bar = tqdm(dataloader)

    with open("/root/asset/test_only_speech_list_k4.txt", "w") as tf:
        for batch, paths in tqdm_bar:
            batch = batch.to('cuda:0')
            with torch.no_grad():
                outputs = model(input_values=batch).logits

            for i, logits in enumerate(outputs):
                logits = logits.squeeze()
                predicted_class_ids = torch.argsort(logits)[-k:]
                predicted_labels = [model.config.id2label[_id.item()] for _id in predicted_class_ids]

                for label in predicted_labels:
                    d[label] += 1

                if 'Speech' not in predicted_labels:
                    tf.write(paths[i] + " --> " + str(predicted_labels) + "\n")
                    counts += 1

            tqdm_bar.set_postfix(only_speech=d)

    return d, counts

# Run inference
d, counts = inference_batch(dataloader, k=4, with_logit=True)
print("Finished processing. Total non-speech files:", counts)


100%|██████████| 1563/1563 [18:03<00:00,  1.44it/s, only_speech=defaultdict(<class 'int'>, {'Bang': 48, 'Burst, pop': 346, 'Explosion': 697, 'Speech': 43830, 'Female speech, woman speaking': 6647, 'Inside, small room': 3757, 'Animal': 5752, 'Door': 879, 'Stomach rumble': 202, 'Knock': 1446, 'Slam': 164, 'Coin (dropping)': 342, 'Typing': 1117, 'Computer keyboard': 1302, 'Bouncing': 137, 'Scissors': 195, 'Oink': 934, 'Grunt': 209, 'Helicopter': 591, 'Vehicle': 7604, 'Speech synthesizer': 2622, 'Narration, monologue': 7148, 'Conversation': 4964, 'Male speech, man speaking': 1912, 'Applause': 695, 'Clapping': 710, 'Music': 6398, 'Tick': 1086, 'Tick-tock': 904, 'Liquid': 1534, 'Water': 1989, 'Toilet flush': 935, 'Power tool': 474, 'Wood': 1106, 'Chainsaw': 669, 'Breathing': 287, 'Gasp': 1257, 'Snort': 955, 'Crack': 464, 'Cap gun': 309, 'Sound effect': 1443, 'Clock': 586, 'Cattle, bovinae': 939, 'Moo': 930, 'Livestock, farm animals, working animals': 1130, 'Wind chime': 48, 'Chime': 51, 'Mos

Finished processing. Total non-speech files: 6170





In [8]:
count = 0

with open("/root/asset/non_speech.txt", "r") as tf, open("/root/asset/no_speech_repath.txt", "w") as wf:
    for r in tf:
        line = r.strip().split(' ')
        path = line[0]
        # Assuming the original path is at the end and needs to be replaced with the updated path
        path = "/root/data/test/" + path[-14:]
        line[0] = path
        new_line = ' '.join(line)
        wf.write(new_line + "\n")
        count += 1

print(f"count : {count}")

count : 5082


In [83]:
with open("/root/asset/test_only_speech_list_k4.txt", "r") as tf, \
     open("/root/asset/no_speech_repath.txt", "r") as wf, \
     open("/root/asset/no_speech_3.2k_k4.txt", "w") as f:
    
    new_line = []
    old_line = []
    for r in tf:
        line = r.strip().split(' ')
        path = line[0][-14:-4]
        new_line.append(path)
    
    for r in wf:
        line = r.strip().split(' ')
        path = line[0][-14:-4]
        old_line.append(path)

    ca_1 = [i for i in new_line if i not in old_line]
    ca_2 = [i for i in old_line if i not in new_line]
    ca_3 = [i for i in old_line if i in new_line]
    ca_4 = [i for i in new_line if i in old_line]

    ca = ca_1 + ca_2 + ca_3
    ca.sort()

    f.write('\n'.join(ca))

    print(f"ca_1 : {len(ca_1)}, ca_2 : {len(ca_2)} ca_3 : {len(ca_3)} ca_4 : {len(ca_4)}")


ca_1 : 1088, ca_2 : 0 ca_3 : 5082 ca_4 : 5082


In [67]:
non_speech_list = []

with open("/root/asset/no_speech_3.2k_k4.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        file_path = line.split(' ')[0].strip('\n')
        #file_name = file_path.split('/')[-1][:-4]
        non_speech_list.append(file_path)

print(non_speech_list[:5])
print(len(non_speech_list))

['TEST_00006', 'TEST_00018', 'TEST_00021', 'TEST_00032', 'TEST_00040']
6170


In [69]:
import pandas as pd

submission_df = pd.read_csv("/root/asset/best/ensemble_best 0.218.csv")
submission_df.loc[submission_df['id'].isin(non_speech_list), ['fake', 'real']] = 0
submission_df

Unnamed: 0,id,fake,real
0,TEST_00000,0.784924,0.885203
1,TEST_00001,0.444133,0.980100
2,TEST_00002,0.795915,0.903753
3,TEST_00003,0.996561,0.485633
4,TEST_00004,0.253478,0.998924
...,...,...,...
49995,TEST_49995,0.186613,0.798281
49996,TEST_49996,0.655186,0.567659
49997,TEST_49997,0.885245,0.697765
49998,TEST_49998,0.456545,0.999993


In [70]:
submission_df.to_csv("./masked_result.csv", index=False)

In [89]:
df_1 = pd.read_csv('./masked_result.csv')
df_2 = pd.read_csv('/root/asset/best/ensemble_best 0.218.csv')

# 두 데이터프레임이 같은 크기인지 확인
if df_1.shape != df_2.shape:
    print("DataFrames have different shapes")
else:
    differences = []

    # 행 단위로 비교
    for index in range(len(df_1)):
        row_1 = df_1.iloc[index]
        row_2 = df_2.iloc[index]
        
        if not row_1.equals(row_2):
            differences.append((index, row_1, row_2))
    
    print(f"Row {len(differences)} is different:")
    # 다른 행 출력
    if differences:
        for diff in differences:
            print("df_1:", diff[1])
            print("df_2:", diff[2])
    else:
        print("No differences found")

Row 1088 is different:
df_1: id      TEST_00018
fake           0.0
real           0.0
Name: 18, dtype: object
df_2: id      TEST_00018
fake      0.051583
real      0.988587
Name: 18, dtype: object
df_1: id      TEST_00021
fake           0.0
real           0.0
Name: 21, dtype: object
df_2: id      TEST_00021
fake      0.788106
real      0.558247
Name: 21, dtype: object
df_1: id      TEST_00032
fake           0.0
real           0.0
Name: 32, dtype: object
df_2: id      TEST_00032
fake       0.29564
real      0.700615
Name: 32, dtype: object
df_1: id      TEST_00042
fake           0.0
real           0.0
Name: 42, dtype: object
df_2: id      TEST_00042
fake      0.914514
real      0.063796
Name: 42, dtype: object
df_1: id      TEST_00075
fake           0.0
real           0.0
Name: 75, dtype: object
df_2: id      TEST_00075
fake      0.235279
real       0.85793
Name: 75, dtype: object
df_1: id      TEST_00129
fake           0.0
real           0.0
Name: 129, dtype: object
df_2: id      TEST_

In [90]:
import pandas as pd

file_ids = []

with open("/root/asset/no_speech_3.2k_k4.txt", "r") as file:
    for line in file:
        #file_path = line.split(' ')[0]
        #file_id = file_path.split('/')[-1].split('.')[0]
        file_ids.append(line.strip("\n"))

# Create a DataFrame
df = pd.DataFrame(file_ids, columns=['id'])

# Save to CSV
csv_path = "/root/asset/nonspeech2.csv"
df.to_csv(csv_path, index=False)

print(f"CSV file saved to {csv_path}")


CSV file saved to /root/asset/nonspeech2.csv


In [6]:
import pandas as pd

ensemble_df = pd.read_csv('/root/asset/ensemble/ensemble_best_num6_0.202.csv')

# 조건에 맞는 행을 필터링하여 값을 조정하는 코드
ensemble_df.loc[(ensemble_df['fake'] >= 0.1) & (ensemble_df['fake'] <= 0.3), 'fake'] -= 0.1
ensemble_df.loc[(ensemble_df['fake'] > 0.7) & (ensemble_df['fake'] <= 0.9), 'fake'] += 0.1

ensemble_df.loc[(ensemble_df['real'] >= 0.1) & (ensemble_df['real'] <= 0.3), 'real'] -= 0.1
ensemble_df.loc[(ensemble_df['real'] > 0.7) & (ensemble_df['real'] <= 0.9), 'real'] += 0.1

ensemble_df.to_csv('/root/asset/ensemble/ensemble_best_num6_0.202_rewrite.csv', index=False)


In [25]:
import pandas as pd

# 파일을 줄 단위로 읽어들임
with open('./Untitled.tsv', 'r') as file:
    lines = file.readlines()

data = []
lines.pop(0)

# 각 줄을 처리하여 데이터 분리
for line in lines:
    # 탭 또는 스페이스 두 번으로 분리
    if '\t' in line:
        dt = line.strip().split('\t')
        if len(dt) == 2:
            id, mask = dt[0], dt[1]
        else:
            id = dt[0]
    elif '  ' in line:
        dt = line.strip().split('  ')
        if len(dt) == 2:
            id, mask = dt[0], dt[1]
        else:
            id = dt[0]
    else:
        continue  # 구분자가 없는 경우 건너뜀

    # 조건에 맞는 'id' 값 추가
    if len(dt) == 1 or mask == 'v':
        data.append(id)

# 새로운 DataFrame 생성
new_df = pd.DataFrame({
    'id': data
})

# CSV 파일로 저장
new_df.to_csv('./new_masking.csv', index=False)
