# License
    IPython notebook for simulating the linear wave equation with CUDA
    Copyright (C) 2015, 2018 Andre.Brodtkorb@ifi.uio.no

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.

In [1]:
%matplotlib inline 

#Import packages we need
import numpy as np
from matplotlib import animation, rc, cm
from matplotlib import pyplot as plt

import pycuda.compiler as cuda_compiler
import pycuda.driver as cuda_driver
from pycuda.gpuarray import GPUArray

import IPythonMagic
from Timer import Timer

import pytest
from ipytest import run_pytest, clean_tests

In [2]:
%setup_logging
%cuda_context_handler context

Python version 3.6.6 | packaged by conda-forge | (default, Jul 26 2018, 09:53:17) 
[GCC 4.8.2 20140120 (Red Hat 4.8.2-15)]
Registering context in user workspace
Creating context
PyCUDA version 2018.1.1
CUDA version (8, 0, 0)
Driver version 9010
Using 'Quadro K2000' GPU
 => compute capability: (3, 0)
 => memory: 1876 / 1999 MB available
Created context handle <48859888>
Using CUDA cache dir /home/jobic/test/cuda/milan2018/MilanoGPU2018/notebooks/cuda_cache


# Heat equation in 1D
The heat equation can be written
$$
\begin{align}
\frac{\partial u}{\partial t} &= \kappa \nabla^2 u\\
&= \kappa \frac{\partial^2 u}{\partial x^2}
\end{align}
$$
where $u$ is the temperature, and $\kappa$ is the material specific heat conduction constant. 

By approximating the temporal derivative with a backward difference, and the spatial derivative with a central difference, we get
$$
\frac{1}{\Delta t} (u_i^{n+1} - u_i^{n}) = \kappa \frac{1}{\Delta x^2}(u_{i-1}^n - 2u_i^n + u_{i+1}^n)
$$
and gathering $u^n+1$ on the left hand side and $u^n$on the right, we write
$$
u^{n+1}_i = u_i^n + \frac{\kappa\Delta t}{\Delta x^2}(u_{i-1}^n - 2u_i^n + u_{i+1}^n)
$$
This discretization is unstable if the following CFL condition is not met
$$
\frac{1}{2} \gt \frac{\kappa\Delta t}{\Delta x^2}
$$
or 
$$
\Delta t \lt \frac{\Delta x^2}{2\kappa}
$$

In [3]:
class HeatEquation(object):
    def __init__(self, u0, kappa, dx, dt):
        self.u0 = u0
        self.u1 = np.empty_like(u0)
        self.kappa = kappa
        self.dx = dx
        self.dt = dt
        self.nx = u0.shape[0]-2
        
        
    def step(self):
        #Internal cells
        for i in range(1, self.nx+1):
            self.u1[i] = self.u0[i] + self.kappa*self.dt/(self.dx*self.dx) * (self.u0[i-1] - 2.0*self.u0[i] + self.u0[i+1]);
            
        #Boundary conditions
        self.u1[0] = self.u1[1]
        self.u1[self.nx+1] = self.u1[self.nx]
        
        self.u0, self.u1 = self.u1, self.u0
        
    def download(self):
        return self.u0

In [4]:
#Create test input data
kappa = 1.0
nx = 100
dx = 1.0
dt = 0.4 * dx**2 / (2.0*kappa)

u0 = np.zeros(nx, dtype=np.float32)
for i in range(nx):
    x = (i - nx/2.0) * dx
    if (np.sqrt(x**2) < 10*dx):
        u0[i] = 10.0

        
with Timer("Initialization") as t:
    simulator = HeatEquation(u0, kappa, dx, dt)

Initialization: 0.029802 ms


In [5]:
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)

x = np.linspace(0, nx*dx, nx)
line, = ax.plot(x, u0)

def animate(i):
    print(".", end='', flush=True)
    if (i > 0):
        for k in range(10):
            simulator.step()
    
    u2 = simulator.download()
    line.set_ydata(u2)
    
plt.rcParams["animation.html"] = "html5"
anim = animation.FuncAnimation(fig, animate, range(50), interval=100)
plt.close()
anim

Animation.save using <class 'matplotlib.animation.FFMpegWriter'>
frame size in pixels is 864 x 576
MovieWriter.run: running command: ['ffmpeg', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-s', '864x576', '-pix_fmt', 'rgba', '-r', '10.0', '-i', 'pipe:', '-vcodec', 'h264', '-pix_fmt', 'yuv420p', '-y', '/tmp/tmpu0w7u6hd/temp.m4v']


..

update_title_pos
findfont: Matching :family=sans-serif:style=normal:variant=normal:weight=normal:stretch=normal:size=10.0 to DejaVu Sans ('/home/jobic/anaconda3/envs/gpudev/lib/python3.6/site-packages/matplotlib/mpl-data/fonts/ttf/DejaVuSans.ttf') with score of 0.050000.
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos


.

update_title_pos
MovieWriter.grab_frame: Grabbing frame.
update_title_pos
MovieWriter -- Command stdout:
b''
MovieWriter -- Command stderr:
b"ffmpeg version 4.0.2-static https://johnvansickle.com/ffmpeg/  Copyright (c) 2000-2018 the FFmpeg developers\n  built with gcc 6.3.0 (Debian 6.3.0-18+deb9u1) 20170516\n  configuration: --enable-gpl --enable-version3 --enable-static --disable-debug --disable-ffplay --disable-indev=sndio --disable-outdev=sndio --cc=gcc-6 --enable-fontconfig --enable-frei0r --enable-gnutls --enable-gray --enable-libaom --enable-libfribidi --enable-libass --enable-libvmaf --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-librubberband --enable-libsoxr --enable-libspeex --enable-libvorbis --enable-libopus --enable-libtheora --enable-libvidstab --enable-libvo-amrwbenc --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg\n  libavuti

In [6]:
class HeatEquationGPU(object):
    def __init__(self):
        pass
        
    def initialize(self, u0, kappa, dx, dt, block_width=128):
        self.kappa = np.float32(kappa)
        self.dx = np.float32(dx)
        self.dt = np.float32(dt)
        self.nx = np.int32(u0.shape[0]-2)
        
        self.block_size = (block_width, 1, 1)
        self.grid_size = (int(np.ceil(self.nx / block_width)), 1, 1)
        self.stream = cuda_driver.Stream()
        
        self.u1_g = GPUArray(u0.shape, u0.dtype)
        self.u0_g = GPUArray(u0.shape, u0.dtype)
        
        self.u0_g.set_async(u0, stream=self.stream)
        
        
        cuda_kernel = \
        """
        #define BLOCK_WIDTH {:d}
        """.format(block_width) \
        + \
        """
        __global__ void heatEqn(float* u1, const float* u0, float kappa, float dx, float dt, int nx) {
            //Plus one to skip the "ghost cells"
            int i = blockIdx.x*blockDim.x + threadIdx.x + 1;
            int tx = threadIdx.x + 1;

            //First read into shared memory, including the local ghost cells / apron
            __shared__ float u0_shared[BLOCK_WIDTH+2];
            for (int k=threadIdx.x; k<BLOCK_WIDTH+2; k += blockDim.x) {
                int s = min(blockIdx.x*blockDim.x + k, nx+1);
                u0_shared[k] = u0[s];
            }
            __syncthreads();

            //Then fix the boundary conditions
            //[0 | 1 2 3 4 | 5]
            if (i == 1) {
                u0_shared[tx-1] = u0_shared[tx];
            }
            else if (i == nx) {
                u0_shared[tx+1] = u0_shared[tx];
            }
            __syncthreads();

            if (i >= 1 && i <= nx) {
                u1[i] = u0_shared[tx] + kappa*dt/(dx*dx) * (u0_shared[tx-1] - 2.0f*u0_shared[tx] + u0_shared[tx+1]);
            }
        }
        """
        self.module = cuda_compiler.SourceModule(cuda_kernel, \
                                            options=['--use_fast_math'])
        self.heat_eqn_kernel = self.module.get_function("heatEqn");
        self.heat_eqn_kernel.prepare("PPfffi")
        
        
        
    def step(self):
        self.heat_eqn_kernel.prepared_async_call(self.grid_size, self.block_size, self.stream, \
                                                    self.u1_g.gpudata, self.u0_g.gpudata, \
                                                    self.kappa, self.dx, self.dt, self.nx)
        self.u0_g, self.u1_g = self.u1_g, self.u0_g
    
    
    def download(self):
        #self.u2 = cuda_driver.pagelocked_empty(u1.shape, u1.dtype)
        u0 = np.empty(self.u0_g.shape, self.u0_g.dtype)
        self.u0_g.get(u0)
        return u0
        
        
        
        
#Create test input data
kappa = 1.0
nx = 100
dx = 1.0
dt = 0.4 * dx**2 / (2.0*kappa)

u0 = np.zeros(nx, dtype=np.float32)
for i in range(nx):
    x = (i - nx/2.0) * dx
    if (np.sqrt(x**2) < 10*dx):
        u0[i] = 10.0

with Timer("Initialization") as t:
    simulator = HeatEquationGPU()
    simulator.initialize(u0, kappa, dx, dt)

Initialization: 696.822166 ms


In [10]:
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111)

x = np.linspace(0, nx*dx, nx)
line, = ax.plot(x, u0)

def animate(i):
    print(".", end='', flush=True)
    if (i > 0):
        for k in range(10):
            simulator.step()
    
    u1 = simulator.download()
    line.set_ydata(u1)
    print(np.sum(u1[1:-1]))
    
    

plt.rcParams["animation.html"] = "html5"
anim = animation.FuncAnimation(fig, animate, range(50), interval=100)
plt.close()
anim

Animation.save using <class 'matplotlib.animation.FFMpegWriter'>
frame size in pixels is 864 x 576
MovieWriter.run: running command: ['ffmpeg', '-f', 'rawvideo', '-vcodec', 'rawvideo', '-s', '864x576', '-pix_fmt', 'rgba', '-r', '10.0', '-i', 'pipe:', '-vcodec', 'h264', '-pix_fmt', 'yuv420p', '-y', 'C:\\Users\\anbro\\AppData\\Local\\Temp\\tmpkkbh8uyo.m4v']


.190.00002
.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.

MovieWriter.grab_frame: Grabbing frame.


190.00003
.

MovieWriter.grab_frame: Grabbing frame.


190.0
.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00002
.190.0


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.0


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00003
.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00005


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00003
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00003
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00003
.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00005


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00002


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.

MovieWriter.grab_frame: Grabbing frame.


190.00005
.190.00005

MovieWriter.grab_frame: Grabbing frame.



.190.00005


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.


.190.00003


MovieWriter.grab_frame: Grabbing frame.
MovieWriter -- Command stdout:
b''
MovieWriter -- Command stderr:
b"ffmpeg version N-76278-gd897d4c Copyright (c) 2000-2015 the FFmpeg developers\r\n  built with gcc 5.2.0 (GCC)\r\n  configuration: --enable-gpl --enable-version3 --disable-w32threads --enable-avisynth --enable-bzlib --enable-fontconfig --enable-frei0r --enable-gnutls --enable-iconv --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libdcadec --enable-libfreetype --enable-libgme --enable-libgsm --enable-libilbc --enable-libmodplug --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-librtmp --enable-libschroedinger --enable-libsoxr --enable-libspeex --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvo-aacenc --enable-libvo-amrwbenc --enable-libvorbis --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxavs --enable-libxvi

In [26]:
clean_tests()


def test_HeatEquationGPU():
    kappa = 1.0
    nx = 10
    dx = 1.0
    dt = 0.4 * dx**2 / (2.0*kappa)

    u0 = np.zeros(nx, dtype=np.float32)
    for i in range(nx):
        x = (i - nx/2.0) * dx
        if (np.sqrt(x**2) < 0.25*dx*nx):
            u0[i] = 10.0
    u_sum = np.sum(u0[1:-1])
            
    cpu_simulator = HeatEquation(u0.copy(), kappa, dx, dt)

    gpu_simulator = HeatEquationGPU()
    gpu_simulator.initialize(u0, kappa, dx, dt)
        
    for i in range(100):
        cpu_simulator.step()
        gpu_simulator.step()
        
        cpu_u = cpu_simulator.download()
        gpu_u = gpu_simulator.download()
        print("CPU: ", cpu_u[1:-1])
        print("GPU: ", gpu_u[1:-1])
        assert cpu_u[1:-1] == pytest.approx(gpu_u[1:-1])
        
        print(u_sum)
        print(np.sum(gpu_u[1:-1]))
        assert u_sum == pytest.approx(np.sum(gpu_u[1:-1]))
        assert u_sum == pytest.approx(np.sum(cpu_u[1:-1]))
        
run_pytest(filename='14 HeatEquation1D.ipynb', pytest_options=['-vvv'])

platform win32 -- Python 3.6.5, pytest-3.8.0, py-1.5.3, pluggy-0.7.1 -- C:\Users\anbro\AppData\Local\Continuum\anaconda3\python.exe
cachedir: .pytest_cache
rootdir: c:\Users\anbro\Documents\projects\demos\MilanoGPU2018\notebooks, inifile:
plugins: remotedata-0.2.1, openfiles-0.3.0, doctestplus-0.1.3, arraydiff-0.2
collecting ... collected 1 item

14 HeatEquation1D.py::test_HeatEquationGPU <- <ipython-input-26-fae2a9f781e9> PASSED [100%]

  self.config,
  self.config,
  self.config,
  self.config,



0