fix algo
This commit is contained in:
843
src/forward.jl
843
src/forward.jl
@@ -59,7 +59,9 @@ function (kfn::kfn_1)(input::AbstractArray)
|
||||
kfn.lif_refractoryDuration,
|
||||
kfn.lif_gammaPd,
|
||||
kfn.lif_firingCounter,
|
||||
kfn.lif_recSignal,)
|
||||
kfn.lif_recSignal,
|
||||
kfn.lif_subscription,
|
||||
)
|
||||
end
|
||||
@async begin
|
||||
# project 3D kfn zit into 4D alif zit
|
||||
@@ -80,6 +82,7 @@ function (kfn::kfn_1)(input::AbstractArray)
|
||||
kfn.alif_gammaPd,
|
||||
kfn.alif_firingCounter,
|
||||
kfn.alif_recSignal,
|
||||
kfn.alif_subscription,
|
||||
kfn.alif_epsilonRecA,
|
||||
kfn.alif_a,
|
||||
kfn.alif_avth,
|
||||
@@ -117,7 +120,9 @@ function (kfn::kfn_1)(input::AbstractArray)
|
||||
kfn.on_refractoryDuration,
|
||||
kfn.on_gammaPd,
|
||||
kfn.on_firingCounter,
|
||||
kfn.on_recSignal,)
|
||||
kfn.on_recSignal,
|
||||
kfn.on_subscription,
|
||||
)
|
||||
|
||||
logit = reshape(kfn.on_zt, (size(input, 1), :))
|
||||
|
||||
@@ -126,6 +131,434 @@ function (kfn::kfn_1)(input::AbstractArray)
|
||||
kfn.zit
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function lifForward( zit::CuArray,
|
||||
wRec::CuArray,
|
||||
vt::CuArray,
|
||||
vth::CuArray,
|
||||
vRest::CuArray,
|
||||
zt::CuArray,
|
||||
alpha::CuArray,
|
||||
phi::CuArray,
|
||||
epsilonRec::CuArray,
|
||||
refractoryCounter::CuArray,
|
||||
refractoryDuration::CuArray,
|
||||
gammaPd::CuArray,
|
||||
firingCounter::CuArray,
|
||||
recSignal::CuArray,
|
||||
subscription::CuArray,
|
||||
)
|
||||
|
||||
kernel = @cuda launch=false lifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
GeneralUtils.linear_to_cartesian,
|
||||
)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(wRec)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function lifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
linear_to_cartesian,
|
||||
)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wRec)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
recSignal[i1,i2,i3,i4] = 0
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = wRec[i1,i2,i3,i4] * zit[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
|
||||
sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from lif formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
|
||||
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function alifForward( zit::CuArray,
|
||||
wRec::CuArray,
|
||||
vt::CuArray,
|
||||
vth::CuArray,
|
||||
vRest::CuArray,
|
||||
zt::CuArray,
|
||||
alpha::CuArray,
|
||||
phi::CuArray,
|
||||
epsilonRec::CuArray,
|
||||
refractoryCounter::CuArray,
|
||||
refractoryDuration::CuArray,
|
||||
gammaPd::CuArray,
|
||||
firingCounter::CuArray,
|
||||
recSignal::CuArray,
|
||||
subscription::CuArray,
|
||||
epsilonRecA::CuArray,
|
||||
a::CuArray,
|
||||
avth::CuArray,
|
||||
beta::CuArray,
|
||||
rho::CuArray,
|
||||
)
|
||||
|
||||
kernel = @cuda launch=false alifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
epsilonRecA,
|
||||
a,
|
||||
avth,
|
||||
beta,
|
||||
rho,
|
||||
GeneralUtils.linear_to_cartesian,
|
||||
)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(wRec)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
epsilonRecA,
|
||||
a,
|
||||
avth,
|
||||
beta,
|
||||
rho,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function alifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
epsilonRecA,
|
||||
a,
|
||||
avth,
|
||||
beta,
|
||||
rho,
|
||||
linear_to_cartesian,
|
||||
)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wRec)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
recSignal[i1,i2,i3,i4] = 0
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
|
||||
# compute epsilonRecA
|
||||
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) *
|
||||
epsilonRecA[i1,i2,i3,i4])
|
||||
|
||||
# compute avth
|
||||
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
|
||||
sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# compute avth
|
||||
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from alif formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
|
||||
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
|
||||
# compute epsilonRecA use eq.26
|
||||
epsilonRecA[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] *
|
||||
(phi[i1,i2,i3,i4] * epsilonRecA[i1,i2,i3,i4])) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function onForward( zit::CuArray,
|
||||
wOut::CuArray,
|
||||
vt::CuArray,
|
||||
vth::CuArray,
|
||||
vRest::CuArray,
|
||||
zt::CuArray,
|
||||
alpha::CuArray,
|
||||
phi::CuArray,
|
||||
epsilonRec::CuArray,
|
||||
refractoryCounter::CuArray,
|
||||
refractoryDuration::CuArray,
|
||||
gammaPd::CuArray,
|
||||
firingCounter::CuArray,
|
||||
recSignal::CuArray,
|
||||
subscription::CuArray,
|
||||
)
|
||||
|
||||
kernel = @cuda launch=false onForward( zit,
|
||||
wOut,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
GeneralUtils.linear_to_cartesian,
|
||||
)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(wOut)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( zit,
|
||||
wOut,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function onForward( zit,
|
||||
wOut,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
subscription,
|
||||
linear_to_cartesian,
|
||||
)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wOut)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
recSignal[i1,i2,i3,i4] = 0
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from on formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function lifForward(kfn_zit::Array{T},
|
||||
zit::Array{T},
|
||||
wRec::Array{T},
|
||||
@@ -193,127 +626,6 @@ function lifForward(kfn_zit::Array{T},
|
||||
end
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function lifForward( lif_zit::CuArray,
|
||||
lif_wRec::CuArray,
|
||||
lif_vt::CuArray,
|
||||
lif_vth::CuArray,
|
||||
lif_vRest::CuArray,
|
||||
lif_zt::CuArray,
|
||||
lif_alpha::CuArray,
|
||||
lif_phi::CuArray,
|
||||
lif_epsilonRec::CuArray,
|
||||
lif_refractoryCounter::CuArray,
|
||||
lif_refractoryDuration::CuArray,
|
||||
lif_gammaPd::CuArray,
|
||||
lif_firingCounter::CuArray,
|
||||
lif_recSignal::CuArray,)
|
||||
|
||||
kernel = @cuda launch=false lifForward( lif_zit,
|
||||
lif_wRec,
|
||||
lif_vt,
|
||||
lif_vth,
|
||||
lif_vRest,
|
||||
lif_zt,
|
||||
lif_alpha,
|
||||
lif_phi,
|
||||
lif_epsilonRec,
|
||||
lif_refractoryCounter,
|
||||
lif_refractoryDuration,
|
||||
lif_gammaPd,
|
||||
lif_firingCounter,
|
||||
lif_recSignal,
|
||||
GeneralUtils.linear_to_cartesian)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(lif_wRec)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( lif_zit,
|
||||
lif_wRec,
|
||||
lif_vt,
|
||||
lif_vth,
|
||||
lif_vRest,
|
||||
lif_zt,
|
||||
lif_alpha,
|
||||
lif_phi,
|
||||
lif_epsilonRec,
|
||||
lif_refractoryCounter,
|
||||
lif_refractoryDuration,
|
||||
lif_gammaPd,
|
||||
lif_firingCounter,
|
||||
lif_recSignal,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function lifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
linear_to_cartesian)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wRec)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from lif formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function alifForward(zit::Array{T},
|
||||
wRec::Array{T},
|
||||
vt0::Array{T},
|
||||
@@ -413,164 +725,6 @@ function alifForward(zit::Array{T},
|
||||
end
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function alifForward( alif_zit::CuArray,
|
||||
alif_wRec::CuArray,
|
||||
alif_vt::CuArray,
|
||||
alif_vth::CuArray,
|
||||
alif_vRest::CuArray,
|
||||
alif_zt::CuArray,
|
||||
alif_alpha::CuArray,
|
||||
alif_phi::CuArray,
|
||||
alif_epsilonRec::CuArray,
|
||||
alif_refractoryCounter::CuArray,
|
||||
alif_refractoryDuration::CuArray,
|
||||
alif_gammaPd::CuArray,
|
||||
alif_firingCounter::CuArray,
|
||||
alif_recSignal::CuArray,
|
||||
alif_epsilonRecA::CuArray,
|
||||
alif_a::CuArray,
|
||||
alif_avth::CuArray,
|
||||
alif_beta::CuArray,
|
||||
alif_rho::CuArray,
|
||||
)
|
||||
|
||||
kernel = @cuda launch=false alifForward( alif_zit,
|
||||
alif_wRec,
|
||||
alif_vt,
|
||||
alif_vth,
|
||||
alif_vRest,
|
||||
alif_zt,
|
||||
alif_alpha,
|
||||
alif_phi,
|
||||
alif_epsilonRec,
|
||||
alif_refractoryCounter,
|
||||
alif_refractoryDuration,
|
||||
alif_gammaPd,
|
||||
alif_firingCounter,
|
||||
alif_recSignal,
|
||||
alif_epsilonRecA,
|
||||
alif_a,
|
||||
alif_avth,
|
||||
alif_beta,
|
||||
alif_rho,
|
||||
GeneralUtils.linear_to_cartesian)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(alif_wRec)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( alif_zit,
|
||||
alif_wRec,
|
||||
alif_vt,
|
||||
alif_vth,
|
||||
alif_vRest,
|
||||
alif_zt,
|
||||
alif_alpha,
|
||||
alif_phi,
|
||||
alif_epsilonRec,
|
||||
alif_refractoryCounter,
|
||||
alif_refractoryDuration,
|
||||
alif_gammaPd,
|
||||
alif_firingCounter,
|
||||
alif_recSignal,
|
||||
alif_epsilonRecA,
|
||||
alif_a,
|
||||
alif_avth,
|
||||
alif_beta,
|
||||
alif_rho,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function alifForward( zit,
|
||||
wRec,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
epsilonRecA,
|
||||
a,
|
||||
avth,
|
||||
beta,
|
||||
rho,
|
||||
linear_to_cartesian)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wRec)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
|
||||
# compute epsilonRecA
|
||||
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
|
||||
|
||||
# compute avth
|
||||
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# compute avth
|
||||
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from alif formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
|
||||
# compute epsilonRecA
|
||||
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
|
||||
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
function onForward(kfn_zit::Array{T},
|
||||
zit::Array{T},
|
||||
wOut::Array{T},
|
||||
@@ -638,133 +792,6 @@ function onForward(kfn_zit::Array{T},
|
||||
end
|
||||
end
|
||||
|
||||
# gpu launcher
|
||||
function onForward( on_zit::CuArray,
|
||||
on_wOut::CuArray,
|
||||
on_vt::CuArray,
|
||||
on_vth::CuArray,
|
||||
on_vRest::CuArray,
|
||||
on_zt::CuArray,
|
||||
on_alpha::CuArray,
|
||||
on_phi::CuArray,
|
||||
on_epsilonRec::CuArray,
|
||||
on_refractoryCounter::CuArray,
|
||||
on_refractoryDuration::CuArray,
|
||||
on_gammaPd::CuArray,
|
||||
on_firingCounter::CuArray,
|
||||
on_recSignal::CuArray)
|
||||
|
||||
kernel = @cuda launch=false onForward( on_zit,
|
||||
on_wOut,
|
||||
on_vt,
|
||||
on_vth,
|
||||
on_vRest,
|
||||
on_zt,
|
||||
on_alpha,
|
||||
on_phi,
|
||||
on_epsilonRec,
|
||||
on_refractoryCounter,
|
||||
on_refractoryDuration,
|
||||
on_gammaPd,
|
||||
on_firingCounter,
|
||||
on_recSignal,
|
||||
GeneralUtils.linear_to_cartesian)
|
||||
config = launch_configuration(kernel.fun)
|
||||
|
||||
# threads to be launched. Since one can't launch exact thread number the kernel needs,
|
||||
# one just launch threads more than this kernel needs then use a guard inside the kernel
|
||||
# to prevent unused threads to access memory.
|
||||
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
|
||||
|
||||
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
|
||||
totalThreads = length(on_wOut)
|
||||
|
||||
blocks = cld(totalThreads, threads)
|
||||
# println("launching gpu kernel")
|
||||
CUDA.@sync begin
|
||||
kernel( on_zit,
|
||||
on_wOut,
|
||||
on_vt,
|
||||
on_vth,
|
||||
on_vRest,
|
||||
on_zt,
|
||||
on_alpha,
|
||||
on_phi,
|
||||
on_epsilonRec,
|
||||
on_refractoryCounter,
|
||||
on_refractoryDuration,
|
||||
on_gammaPd,
|
||||
on_firingCounter,
|
||||
on_recSignal,
|
||||
GeneralUtils.linear_to_cartesian; threads, blocks)
|
||||
end
|
||||
end
|
||||
|
||||
# gpu kernel
|
||||
function onForward( zit,
|
||||
wOut,
|
||||
vt,
|
||||
vth,
|
||||
vRest,
|
||||
zt,
|
||||
alpha,
|
||||
phi,
|
||||
epsilonRec,
|
||||
refractoryCounter,
|
||||
refractoryDuration,
|
||||
gammaPd,
|
||||
firingCounter,
|
||||
recSignal,
|
||||
linear_to_cartesian)
|
||||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
|
||||
|
||||
if i <= length(wOut)
|
||||
# cartesian index
|
||||
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
|
||||
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
|
||||
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
|
||||
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
|
||||
refractoryCounter[i1,i2,i3,i4] -= 1
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
|
||||
phi[i1,i2,i3,i4] = 0
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
|
||||
else # refractory period is inactive
|
||||
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
|
||||
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
|
||||
|
||||
# fires if membrane potential exceed threshold
|
||||
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
|
||||
zt[i1,i2,i3,i4] = 1
|
||||
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
|
||||
firingCounter[i1,i2,i3,i4] += 1
|
||||
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
|
||||
else
|
||||
zt[i1,i2,i3,i4] = 0
|
||||
end
|
||||
|
||||
# compute phi, there is a difference from on formula
|
||||
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
|
||||
|
||||
# compute epsilonRec
|
||||
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
|
||||
end
|
||||
end
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user