Skip to content

Commit

Permalink
auto-tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
voxel-tracer committed Nov 15, 2018
1 parent 5117e61 commit 19e10d4
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 18 deletions.
4 changes: 2 additions & 2 deletions Cpp/Cuda/CudaRender.cu
Original file line number Diff line number Diff line change
Expand Up @@ -430,8 +430,8 @@ void deviceEndRendering(f3* colors, unsigned long& rayCount)
for (uint i = 0; i < numRays; i++)
rayCount += i_tmp[i];

cudaFree(f_tmp);
cudaFree(i_tmp);
cudaFreeHost(f_tmp);
cudaFreeHost(i_tmp);
}

void deviceFreeData()
Expand Down
44 changes: 28 additions & 16 deletions Cpp/Windows/TestWin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "../Source/Config.h"
#include "../Source/Test.h"

static size_t RenderFrame();

static float* g_Backbuffer;

void write_image(const char* output_file) {
Expand Down Expand Up @@ -42,30 +40,44 @@ float render(const unsigned int numFrames, const unsigned int samplesPerPixel, c

const float duration = (float)(clock() - start_time) / CLOCKS_PER_SEC;
const float throughput = rayCounter / duration * 1.0e-6f;
printf("total %lu rays in %.2fs (%.1fMrays/s)\n", rayCounter, duration, throughput);
//printf(" total %lu rays in %.2fs (%.1fMrays/s)\n", rayCounter, duration, throughput);
return throughput;
}

int main(int argc, char** argv) {
unsigned int numFrames[3] = { 100, 200, 400 };
unsigned int numSamples[6] = { 1, 2, 4, 8, 16, 32 };
unsigned int numThreads[8] = { 32, 64, 96, 128, 160, 182, 224, 256 };

g_Backbuffer = new float[kBackbufferWidth * kBackbufferHeight * 4];
memset(g_Backbuffer, 0, kBackbufferWidth * kBackbufferHeight * 4 * sizeof(g_Backbuffer[0]));

// Main rendering loop
std::vector<float> v;

for (int i = 0; i < 10; i++)
for (int frame = 0; frame < 3; frame++)
{
float throughput = render(100, 4, 128);
fflush(stdout);

v.push_back(throughput);
for (int sample = 0; sample < 6; sample++)
{
for (int threads = 0; threads < 8; threads++)
{
printf("%d frames, %d samples, %d threads\n", numFrames[frame], numSamples[sample], numThreads[threads]);

std::vector<float> v;

for (int i = 0; i < 10; i++)
{
float throughput = render(numFrames[frame], numSamples[sample], numThreads[threads]);
fflush(stdout);

v.push_back(throughput);
}

std::sort(v.begin(), v.end());
float median = (v[5] + v[6]) / 2;
printf(" median throughput %.1fM rays/s\n", median);
}
}
}

std::sort(v.begin(), v.end());
float median = (v[5] + v[6]) / 2;
printf("median throughput %.1fM rays/s\n", median);

write_image("image.png");
//write_image("image.png");

return 0;
}

0 comments on commit 19e10d4

Please sign in to comment.