This commit is contained in:
ton
2023-08-10 10:06:21 +07:00
parent 65bb97baf3
commit a80e9f2621
3 changed files with 512 additions and 441 deletions

View File

@@ -59,7 +59,9 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.lif_refractoryDuration,
kfn.lif_gammaPd,
kfn.lif_firingCounter,
kfn.lif_recSignal,)
kfn.lif_recSignal,
kfn.lif_subscription,
)
end
@async begin
# project 3D kfn zit into 4D alif zit
@@ -80,6 +82,7 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.alif_gammaPd,
kfn.alif_firingCounter,
kfn.alif_recSignal,
kfn.alif_subscription,
kfn.alif_epsilonRecA,
kfn.alif_a,
kfn.alif_avth,
@@ -117,7 +120,9 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.on_refractoryDuration,
kfn.on_gammaPd,
kfn.on_firingCounter,
kfn.on_recSignal,)
kfn.on_recSignal,
kfn.on_subscription,
)
logit = reshape(kfn.on_zt, (size(input, 1), :))
@@ -126,6 +131,434 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.zit
end
# gpu launcher
function lifForward( zit::CuArray,
wRec::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
)
kernel = @cuda launch=false lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = wRec[i1,i2,i3,i4] * zit[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from lif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
# gpu launcher
function alifForward( zit::CuArray,
wRec::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
epsilonRecA::CuArray,
a::CuArray,
avth::CuArray,
beta::CuArray,
rho::CuArray,
)
kernel = @cuda launch=false alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) *
epsilonRecA[i1,i2,i3,i4])
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
sum(@view(recSignal[:,:,i3,i4]))
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
else
zt[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
end
# compute phi, there is a difference from alif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
# compute epsilonRecA use eq.26
epsilonRecA[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] *
(phi[i1,i2,i3,i4] * epsilonRecA[i1,i2,i3,i4])) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
# gpu launcher
function onForward( zit::CuArray,
wOut::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
)
kernel = @cuda launch=false onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wOut)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wOut)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from on formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
function lifForward(kfn_zit::Array{T},
zit::Array{T},
wRec::Array{T},
@@ -193,127 +626,6 @@ function lifForward(kfn_zit::Array{T},
end
end
# gpu launcher
function lifForward( lif_zit::CuArray,
lif_wRec::CuArray,
lif_vt::CuArray,
lif_vth::CuArray,
lif_vRest::CuArray,
lif_zt::CuArray,
lif_alpha::CuArray,
lif_phi::CuArray,
lif_epsilonRec::CuArray,
lif_refractoryCounter::CuArray,
lif_refractoryDuration::CuArray,
lif_gammaPd::CuArray,
lif_firingCounter::CuArray,
lif_recSignal::CuArray,)
kernel = @cuda launch=false lifForward( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(lif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from lif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
end
end
return nothing
end
function alifForward(zit::Array{T},
wRec::Array{T},
vt0::Array{T},
@@ -413,164 +725,6 @@ function alifForward(zit::Array{T},
end
end
# gpu launcher
function alifForward( alif_zit::CuArray,
alif_wRec::CuArray,
alif_vt::CuArray,
alif_vth::CuArray,
alif_vRest::CuArray,
alif_zt::CuArray,
alif_alpha::CuArray,
alif_phi::CuArray,
alif_epsilonRec::CuArray,
alif_refractoryCounter::CuArray,
alif_refractoryDuration::CuArray,
alif_gammaPd::CuArray,
alif_firingCounter::CuArray,
alif_recSignal::CuArray,
alif_epsilonRecA::CuArray,
alif_a::CuArray,
alif_avth::CuArray,
alif_beta::CuArray,
alif_rho::CuArray,
)
kernel = @cuda launch=false alifForward( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(alif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
epsilonRecA,
a,
avth,
beta,
rho,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
else
zt[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
end
# compute phi, there is a difference from alif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
end
end
return nothing
end
function onForward(kfn_zit::Array{T},
zit::Array{T},
wOut::Array{T},
@@ -638,133 +792,6 @@ function onForward(kfn_zit::Array{T},
end
end
# gpu launcher
function onForward( on_zit::CuArray,
on_wOut::CuArray,
on_vt::CuArray,
on_vth::CuArray,
on_vRest::CuArray,
on_zt::CuArray,
on_alpha::CuArray,
on_phi::CuArray,
on_epsilonRec::CuArray,
on_refractoryCounter::CuArray,
on_refractoryDuration::CuArray,
on_gammaPd::CuArray,
on_firingCounter::CuArray,
on_recSignal::CuArray)
kernel = @cuda launch=false onForward( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(on_wOut)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wOut)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from on formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
end
end
return nothing
end