
CSE 628 Term Project Report\
GPU and Multithreaded Comparision of Mean Face using img_align_celeba Dataset\
Zack Owens\
7/19/2023


In [None]:
cd("..\\img_align_celeba")
pwd()

In [None]:
using Base.Threads
num_threads = Threads.nthreads()

In [None]:
# read in the CSV
using CSV
using DataFrames


df = CSV.read("./list_attr_celeba_2.csv",DataFrame)

In [None]:
# split list into men list (hint use lpad)
males = filter(row->row.Male==1,df)
males.id

In [None]:
# split list into women list (hint use lpad)
females = filter(row->row.Male==0,df)
females.id

## 1.2

Display the last 3 faces of men and the last 3 faces of women.

In [None]:
#show last 3 mens and womens faces
using Images, FileIO, Colors

last_3_males = males.id[length(males.id)-2:length(males.id)]
last_3_females = females.id[length(females.id)-2:length(females.id)]

@time begin
    vector_N = Vector{Matrix{RGB{N0f8}}}([])
    for (i, f) in enumerate(last_3_males)
            push!(vector_N, load(f))
    end
end


@time begin
    vector_N_2 = Vector{Matrix{RGB{N0f8}}}([])
    for (i, f) in enumerate(last_3_females)
            push!(vector_N_2, load(f))
    end
end


mosaicview([vector_N ; vector_N_2]; 
fillvalue=0.5, npad=10, ncol=2)

In [None]:
using Hwloc
using Base.Threads
topology()

In [None]:
print(Sys.total_memory() / 2^20 /1000, " GB of RAM") 

In [None]:


function mean_RGB(args...)
    
    c = RGB{Float64}(0,0,0)    
    w = 1.0 / length(args)
    for arg in args
       c += w * arg
    end
    
    return RGB{N0f8}(c)
end


In [None]:
# read in the images for men at once
cd("..\\img_align_celeba")
pwd()


@time begin
    vector_N = Vector{Matrix{RGB{N0f8}}}([])
    for (i, f) in enumerate(males.id)
            push!(vector_N, load(f))
    end
end






In [None]:
using Folds, Referenceables, Base.Threads, CUDA
function calculate_batch_mean(images::Vector{Matrix{RGB{N0f8}}})    
    return mean_RGB.(images...)
end

function batch_images(images::Vector{Matrix{RGB{N0f8}}} , batch_size::Int)
    num_images = length(images)
    num_batches = div(num_images, batch_size)
    
    batches = [images[(i - 1) * batch_size + 1:min(i * batch_size, num_images)] for i in 1:num_batches]
    return batches
end


function calculate_mean_parallel(images::Vector{Matrix{RGB{N0f8}}} , batch_size::Int)
    batches = batch_images(images, batch_size)
    num_batches = length(batches)
    
    mean_images = Vector{Matrix{RGB{N0f8}}}(undef, num_batches)
    #mean_images = Vector{Matrix{RGB{N0f8}}}(num_batches)

    # each thread will do a batch
    @threads for i in 1:num_batches
        mean_images[i] = mean_RGB.(batches[i]...)
    end
    
    # Combine the mean images from different batches to get the final mean image
    mean_image =  mean_RGB.(mean_images...)
    
    return mean_image
end


function calculate_mean_parallel_s(images::Vector{Matrix{RGB{N0f8}}} , batch_size::Int)
    batches = batch_images(images, batch_size)
    num_batches = length(batches)
    
    mean_images = Vector{Matrix{RGB{N0f8}}}(undef, num_batches)

    # each thread will do a batch
    @sync for i in 1:num_batches
        Threads.@spawn mean_images[i] = mean_RGB.(batches[i]...)
    end
    
    # Combine the mean images from different batches to get the final mean image
    mean_image =  mean_RGB.(mean_images...)
    
    return mean_image
end




In [None]:
# NOTE
isbitstype(RGB{N0f8}) # true 
isbitstype(Array{RGB{N0f8}}) # false, ok but array can be convered to CuArray

# Game plan load to array instead of vector then convert to cuda from there


In [None]:
@time cuda_mean(vector_N[1:100])

# Linear

In [None]:
@time vector_mean = mean_RGB.(vector_N[1:100]...)

In [None]:
@time vector_mean = mean_RGB.(vector_N[1:500]...)

In [None]:
@time vector_mean = mean_RGB(vector_N[1:1_000])

# Parrallel using @threads

In [None]:
@time vector_mean = calculate_mean_parallel(vector_N[1:100],10)

In [None]:
@time vector_mean = calculate_mean_parallel(vector_N[1:500],50)

In [None]:
@time vector_mean = calculate_mean_parallel(vector_N[1:1_000],100)

In [None]:
@time vector_mean = calculate_mean_parallel(vector_N[1:10_000],500)

# Paralled using Spawn

In [None]:
@time vector_mean = calculate_mean_parallel_s(vector_N[1:100],10)

In [None]:
@time vector_mean = calculate_mean_parallel_s(vector_N[1:500],50)

In [None]:
@time vector_mean = calculate_mean_parallel_s(vector_N[1:10_000],500)

# Cuda arrays

In [None]:
length(males.id)

In [None]:
CUDA.reclaim()

In [None]:
CUDA.memory_status()

In [None]:
using CUDA
num_images = 40_000
@time begin
    # GPU does not have enough memory to do all 84_000+ images ... Windows uses half of my gpu so only less than 4 gb is actaully accessable
    array_N = CuArray{RGB{N0f8}}(undef, 218, 178, length(males.id[1:num_images]))
    # requires (number_of_faces)x218x178x3 bytes of memory space
    
    for (i,f) in enumerate(males.id[1:num_images])
        if i <= length(males.id[1:num_images])
            array_N[:,:,i] = load(f)
        else
            break
        end
    end
end


#@time begin
#    vector_N = Vector{Matrix{RGB{N0f8}}}([])
#    for (i, f) in enumerate(males.id)
#            push!(vector_N, load(f))
#    end
#end


In [None]:
#function 
function CudaMeanFace(array::CuArray{RGB{N0f8}},num_images::Int)
    # create mean face
    a,b,c = size(array)
    mean_face = CuArray{RGB{N0f64}}(undef, 218, 178)

    
    mean_face = sum(array,dims=3)
    mean_face ./= num_images
    return mean_face
end

In [None]:
@time value = CudaMeanFace(array_N,num_images)
single_image = Array(value[:,:,])


# read in the images for women at once
@time begin
    vector_N_2 = Vector{Matrix{RGB{N0f8}}}([])
    for (i, f) in enumerate(females.id)
            push!(vector_N_2, load(f))
    end
end
