This commit is contained in:
ton
2023-08-10 10:06:21 +07:00
parent 65bb97baf3
commit a80e9f2621
3 changed files with 512 additions and 441 deletions

View File

@@ -59,7 +59,9 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.lif_refractoryDuration, kfn.lif_refractoryDuration,
kfn.lif_gammaPd, kfn.lif_gammaPd,
kfn.lif_firingCounter, kfn.lif_firingCounter,
kfn.lif_recSignal,) kfn.lif_recSignal,
kfn.lif_subscription,
)
end end
@async begin @async begin
# project 3D kfn zit into 4D alif zit # project 3D kfn zit into 4D alif zit
@@ -80,6 +82,7 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.alif_gammaPd, kfn.alif_gammaPd,
kfn.alif_firingCounter, kfn.alif_firingCounter,
kfn.alif_recSignal, kfn.alif_recSignal,
kfn.alif_subscription,
kfn.alif_epsilonRecA, kfn.alif_epsilonRecA,
kfn.alif_a, kfn.alif_a,
kfn.alif_avth, kfn.alif_avth,
@@ -117,7 +120,9 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.on_refractoryDuration, kfn.on_refractoryDuration,
kfn.on_gammaPd, kfn.on_gammaPd,
kfn.on_firingCounter, kfn.on_firingCounter,
kfn.on_recSignal,) kfn.on_recSignal,
kfn.on_subscription,
)
logit = reshape(kfn.on_zt, (size(input, 1), :)) logit = reshape(kfn.on_zt, (size(input, 1), :))
@@ -126,6 +131,434 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.zit kfn.zit
end end
# gpu launcher
function lifForward( zit::CuArray,
wRec::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
)
kernel = @cuda launch=false lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = wRec[i1,i2,i3,i4] * zit[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from lif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
# gpu launcher
function alifForward( zit::CuArray,
wRec::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
epsilonRecA::CuArray,
a::CuArray,
avth::CuArray,
beta::CuArray,
rho::CuArray,
)
kernel = @cuda launch=false alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
epsilonRecA,
a,
avth,
beta,
rho,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) *
epsilonRecA[i1,i2,i3,i4])
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) +
sum(@view(recSignal[:,:,i3,i4]))
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
else
zt[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
end
# compute phi, there is a difference from alif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) *
max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
# compute epsilonRecA use eq.26
epsilonRecA[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] *
(phi[i1,i2,i3,i4] * epsilonRecA[i1,i2,i3,i4])) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
# gpu launcher
function onForward( zit::CuArray,
wOut::CuArray,
vt::CuArray,
vth::CuArray,
vRest::CuArray,
zt::CuArray,
alpha::CuArray,
phi::CuArray,
epsilonRec::CuArray,
refractoryCounter::CuArray,
refractoryDuration::CuArray,
gammaPd::CuArray,
firingCounter::CuArray,
recSignal::CuArray,
subscription::CuArray,
)
kernel = @cuda launch=false onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian,
)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(wOut)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
subscription,
linear_to_cartesian,
)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wOut)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
recSignal[i1,i2,i3,i4] = 0
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from on formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
(zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
end
end
return nothing
end
function lifForward(kfn_zit::Array{T}, function lifForward(kfn_zit::Array{T},
zit::Array{T}, zit::Array{T},
wRec::Array{T}, wRec::Array{T},
@@ -193,127 +626,6 @@ function lifForward(kfn_zit::Array{T},
end end
end end
# gpu launcher
function lifForward( lif_zit::CuArray,
lif_wRec::CuArray,
lif_vt::CuArray,
lif_vth::CuArray,
lif_vRest::CuArray,
lif_zt::CuArray,
lif_alpha::CuArray,
lif_phi::CuArray,
lif_epsilonRec::CuArray,
lif_refractoryCounter::CuArray,
lif_refractoryDuration::CuArray,
lif_gammaPd::CuArray,
lif_firingCounter::CuArray,
lif_recSignal::CuArray,)
kernel = @cuda launch=false lifForward( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(lif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from lif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
end
end
return nothing
end
function alifForward(zit::Array{T}, function alifForward(zit::Array{T},
wRec::Array{T}, wRec::Array{T},
vt0::Array{T}, vt0::Array{T},
@@ -413,164 +725,6 @@ function alifForward(zit::Array{T},
end end
end end
# gpu launcher
function alifForward( alif_zit::CuArray,
alif_wRec::CuArray,
alif_vt::CuArray,
alif_vth::CuArray,
alif_vRest::CuArray,
alif_zt::CuArray,
alif_alpha::CuArray,
alif_phi::CuArray,
alif_epsilonRec::CuArray,
alif_refractoryCounter::CuArray,
alif_refractoryDuration::CuArray,
alif_gammaPd::CuArray,
alif_firingCounter::CuArray,
alif_recSignal::CuArray,
alif_epsilonRecA::CuArray,
alif_a::CuArray,
alif_avth::CuArray,
alif_beta::CuArray,
alif_rho::CuArray,
)
kernel = @cuda launch=false alifForward( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(alif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
epsilonRecA,
a,
avth,
beta,
rho,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# compute avth
avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
else
zt[i1,i2,i3,i4] = 0
a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
end
# compute phi, there is a difference from alif formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
# compute epsilonRecA
epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
end
end
return nothing
end
function onForward(kfn_zit::Array{T}, function onForward(kfn_zit::Array{T},
zit::Array{T}, zit::Array{T},
wOut::Array{T}, wOut::Array{T},
@@ -638,133 +792,6 @@ function onForward(kfn_zit::Array{T},
end end
end end
# gpu launcher
function onForward( on_zit::CuArray,
on_wOut::CuArray,
on_vt::CuArray,
on_vth::CuArray,
on_vRest::CuArray,
on_zt::CuArray,
on_alpha::CuArray,
on_phi::CuArray,
on_epsilonRec::CuArray,
on_refractoryCounter::CuArray,
on_refractoryDuration::CuArray,
on_gammaPd::CuArray,
on_firingCounter::CuArray,
on_recSignal::CuArray)
kernel = @cuda launch=false onForward( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(on_wOut)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wOut)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i1,i2,i3,i4] -= 1
if refractoryCounter[i1,i2,i3,i4] > 0 # refractory period is active
refractoryCounter[i1,i2,i3,i4] -= 1
zt[i1,i2,i3,i4] = 0
vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
phi[i1,i2,i3,i4] = 0
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
else # refractory period is inactive
recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
zt[i1,i2,i3,i4] = 1
refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
firingCounter[i1,i2,i3,i4] += 1
vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
else
zt[i1,i2,i3,i4] = 0
end
# compute phi, there is a difference from on formula
phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
# compute epsilonRec
epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
end
end
return nothing
end

View File

@@ -9,6 +9,7 @@ using ..type, ..snnUtil
#------------------------------------------------------------------------------------------------100 #------------------------------------------------------------------------------------------------100
function compute_paramsChange!(kfn::kfn_1, modelError, outputError) function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
modelError = reshape(modelError, (1,1,1,:)) # (1,1,1,batch)
lifComputeParamsChange!(kfn.lif_phi, lifComputeParamsChange!(kfn.lif_phi,
kfn.lif_epsilonRec, kfn.lif_epsilonRec,
kfn.lif_eta, kfn.lif_eta,
@@ -18,7 +19,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
kfn.on_wOut, kfn.on_wOut,
kfn.lif_arrayProjection4d, kfn.lif_arrayProjection4d,
kfn.lif_error, kfn.lif_error,
modelError) modelError,
kfn.inputSize,
)
alifComputeParamsChange!(kfn.alif_phi, alifComputeParamsChange!(kfn.alif_phi,
kfn.alif_epsilonRec, kfn.alif_epsilonRec,
@@ -30,7 +34,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
kfn.alif_arrayProjection4d, kfn.alif_arrayProjection4d,
kfn.alif_error, kfn.alif_error,
modelError, modelError,
kfn.alif_beta)
kfn.alif_epsilonRecA,
kfn.alif_beta,
)
onComputeParamsChange!(kfn.on_phi, onComputeParamsChange!(kfn.on_phi,
kfn.on_epsilonRec, kfn.on_epsilonRec,
@@ -38,7 +45,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
kfn.on_eRec, kfn.on_eRec,
kfn.on_wOut, kfn.on_wOut,
kfn.on_wOutChange, kfn.on_wOutChange,
outputError) kfn.on_arrayProjection4d,
kfn.on_error,
outputError,
)
# error("DEBUG -> kfn compute_paramsChange! $(Dates.now())") # error("DEBUG -> kfn compute_paramsChange! $(Dates.now())")
end end
@@ -51,18 +61,28 @@ function lifComputeParamsChange!( phi::CuArray,
wOut::CuArray, wOut::CuArray,
arrayProjection4d::CuArray, arrayProjection4d::CuArray,
nError::CuArray, nError::CuArray,
modelError::CuArray) modelError::CuArray,
# Bₖⱼ in paper, sum() to get each neuron's total wOut weight inputSize::CuArray,
wOutSum = sum(wOut, dims=3) .* arrayProjection4d )
# Bₖⱼ in paper, sum() to get each neuron's total wOut weight,
# use absolute because only magnitude is needed
wOutSum_all = reshape( abs.(sum(wOut, dims=3)), (1,1,:, size(wOut, 4)) ) # (1,1,allNeuron,batch)
# get only each lif neuron's wOut, leaving out other neuron's wOut
startIndex = prod(inputSize) +1
stopIndex = startIndex + size(wRec, 3) -1
wOutSum = @view(wOutSum_all[1,1, startIndex:stopIndex, :])
wOutSum = reshape(wOutSum, (1, 1, size(wOutSum, 1), size(wOutSum, 2))) # (1,1,n,batch)
# nError a.k.a. learning signal use dopamine concept, # nError a.k.a. learning signal use dopamine concept,
# this neuron receive summed error signal (modelError) # this neuron receive summed error signal (modelError)
nError .= (modelError .* arrayProjection4d) .* wOutSum nError .= (modelError .* wOutSum) .* arrayProjection4d
eRec .= phi .* epsilonRec eRec .= phi .* epsilonRec
# GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange wRecChange .+= ((-1 .* eta) .* nError .* eRec)
wRecChange .+= ((-1 .* eta) .* nError .* eRec .* sign.(wRec)) .* GeneralUtils.isNotEqual.(wRec, 0)
# error("DEBUG -> lifComputeParamsChange! $(Dates.now())") # reset epsilonRec
epsilonRec .= 0
end end
function alifComputeParamsChange!( phi::CuArray, function alifComputeParamsChange!( phi::CuArray,
@@ -75,18 +95,29 @@ function alifComputeParamsChange!( phi::CuArray,
arrayProjection4d::CuArray, arrayProjection4d::CuArray,
nError::CuArray, nError::CuArray,
modelError::CuArray, modelError::CuArray,
beta::CuArray)
# Bₖⱼ in paper, sum() to get each neuron's total wOut weight epsilonRecA::CuArray,
wOutSum = sum(wOut, dims=3) .* arrayProjection4d beta::CuArray
)
# Bₖⱼ in paper, sum() to get each neuron's total wOut weight,
# use absolute because only magnitude is needed
wOutSum_all = reshape( abs.(sum(wOut, dims=3)), (1,1,:, size(wOut, 4)) ) # (1,1,allNeuron,batch)
# get only each lif neuron's wOut, leaving out other neuron's wOut
wOutSum = @view(wOutSum_all[1,1, end-size(wRec, 3)+1:end, :])
wOutSum = reshape(wOutSum, (1, 1, size(wOutSum, 1), size(wOutSum, 2))) # (1,1,n,batch)
# nError a.k.a. learning signal use dopamine concept, # nError a.k.a. learning signal use dopamine concept,
# this neuron receive summed error signal (modelError) # this neuron receive summed error signal (modelError)
nError .= (modelError .* arrayProjection4d) .* wOutSum nError .= (modelError .* wOutSum) .* arrayProjection4d
eRec .= (phi .* epsilonRec) .+ (phi .* epsilonRec .* beta) eRec .= phi .* (epsilonRec .- (beta .* epsilonRecA)) # use eq. 25
wRecChange .+= ((-1 .* eta) .* nError .* eRec)
# reset epsilonRec
epsilonRec .= 0
epsilonRecA .= 0
# GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange
wRecChange .+= ((-1 .* eta) .* nError .* eRec .* sign.(wRec)) .* GeneralUtils.isNotEqual.(wRec, 0)
# error("DEBUG -> alifComputeParamsChange! $(Dates.now())") # error("DEBUG -> alifComputeParamsChange! $(Dates.now())")
end end
@@ -96,15 +127,17 @@ function onComputeParamsChange!(phi::CuArray,
eRec::CuArray, eRec::CuArray,
wOut::CuArray, wOut::CuArray,
wOutChange::CuArray, wOutChange::CuArray,
arrayProjection4d::CuArray,
nError::CuArray,
outputError::CuArray # outputError is output neuron's error outputError::CuArray # outputError is output neuron's error
) )
# nError a.k.a. learning signal use dopamine concept, eRec .= phi .* epsilonRec
# this neuron receive summed error signal (modelError) nError .= reshape(outputError, (1, 1, :, size(outputError, 2))) .* arrayProjection4d
eRec .= (phi .* epsilonRec) .* reshape(outputError, (1, 1, :, size(epsilonRec, 4))) wOutChange .+= ((-1 .* eta) .* nError .* eRec)
# GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange # reset epsilonRec
wOutChange .+= ((-1 .* eta) .* eRec .* sign.(wOut)) .* GeneralUtils.isNotEqual.(wOut, 0) epsilonRec .= 0
# error("DEBUG -> onComputeParamsChange! $(Dates.now())") # error("DEBUG -> onComputeParamsChange! $(Dates.now())")
end end
@@ -224,20 +257,20 @@ end
function lifLearn!(wRec, function lifLearn!(wRec,
wRecChange, wRecChange,
arrayProjection4d) arrayProjection4d)
# merge learning weight with average learning weight # merge learning weight with average learning weight
wRec .+= (sum(wRecChange) ./ (size(wRec, 4))) .* arrayProjection4d wRec .+= (sum(wRecChange, dims=4) ./ (size(wRec, 4))) .* arrayProjection4d
#TODO synaptic strength #TODO synaptic strength
#TODO neuroplasticity #TODO neuroplasticity
# error("DEBUG -> lifLearn! $(Dates.now())")
end end
function alifLearn!(wRec, function alifLearn!(wRec,
wRecChange, wRecChange,
arrayProjection4d) arrayProjection4d)
# merge learning weight # merge learning weight with average learning weight
wRec .+= (sum(wRecChange) ./ (size(wRec, 4))) .* arrayProjection4d wRec .+= (sum(wRecChange) ./ (size(wRec, 4))) .* arrayProjection4d
#TODO synaptic strength #TODO synaptic strength
@@ -249,7 +282,7 @@ end
function onLearn!(wOut, function onLearn!(wOut,
wOutChange, wOutChange,
arrayProjection4d) arrayProjection4d)
# merge learning weight # merge learning weight with average learning weight
wOut .+= (sum(wOutChange) ./ (size(wOut, 4))) .* arrayProjection4d wOut .+= (sum(wOutChange) ./ (size(wOut, 4))) .* arrayProjection4d
# adaptive wOut to help convergence using c_decay # adaptive wOut to help convergence using c_decay

View File

@@ -21,6 +21,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
timeStep::Union{AbstractArray, Nothing} = nothing timeStep::Union{AbstractArray, Nothing} = nothing
learningStage::Union{AbstractArray, Nothing} = nothing # 0 inference, 1 start, 2 during, 3 end learning learningStage::Union{AbstractArray, Nothing} = nothing # 0 inference, 1 start, 2 during, 3 end learning
inputSize::Union{AbstractArray, Nothing} = nothing
zit::Union{AbstractArray, Nothing} = nothing # 3D activation matrix zit::Union{AbstractArray, Nothing} = nothing # 3D activation matrix
modelError::Union{AbstractArray, Nothing} = nothing # store RSNN error modelError::Union{AbstractArray, Nothing} = nothing # store RSNN error
outputError::Union{AbstractArray, Nothing} = nothing # store output neurons error outputError::Union{AbstractArray, Nothing} = nothing # store output neurons error
@@ -50,6 +51,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
lif_gammaPd::Union{AbstractArray, Nothing} = nothing lif_gammaPd::Union{AbstractArray, Nothing} = nothing
lif_wRecChange::Union{AbstractArray, Nothing} = nothing lif_wRecChange::Union{AbstractArray, Nothing} = nothing
lif_error::Union{AbstractArray, Nothing} = nothing lif_error::Union{AbstractArray, Nothing} = nothing
lif_subscription::Union{AbstractArray, Nothing} = nothing
lif_firingCounter::Union{AbstractArray, Nothing} = nothing lif_firingCounter::Union{AbstractArray, Nothing} = nothing
@@ -85,6 +87,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
alif_gammaPd::Union{AbstractArray, Nothing} = nothing alif_gammaPd::Union{AbstractArray, Nothing} = nothing
alif_wRecChange::Union{AbstractArray, Nothing} = nothing alif_wRecChange::Union{AbstractArray, Nothing} = nothing
alif_error::Union{AbstractArray, Nothing} = nothing alif_error::Union{AbstractArray, Nothing} = nothing
alif_subscription::Union{AbstractArray, Nothing} = nothing
alif_firingCounter::Union{AbstractArray, Nothing} = nothing alif_firingCounter::Union{AbstractArray, Nothing} = nothing
@@ -137,6 +140,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
on_gammaPd::Union{AbstractArray, Nothing} = nothing on_gammaPd::Union{AbstractArray, Nothing} = nothing
on_wOutChange::Union{AbstractArray, Nothing} = nothing on_wOutChange::Union{AbstractArray, Nothing} = nothing
on_error::Union{AbstractArray, Nothing} = nothing on_error::Union{AbstractArray, Nothing} = nothing
on_subscription::Union{AbstractArray, Nothing} = nothing
on_firingCounter::Union{AbstractArray, Nothing} = nothing on_firingCounter::Union{AbstractArray, Nothing} = nothing
@@ -162,8 +166,8 @@ function kfn_1(params::Dict; device=cpu)
# ---------------------------------------------------------------------------- # # ---------------------------------------------------------------------------- #
# row*col is a 2D matrix represent all RSNN activation # row*col is a 2D matrix represent all RSNN activation
row, col, batch = kfn.params[:inputPort][:signal][:numbers] # z-axis represent signal batch number row, col, batch = kfn.params[:inputPort][:signal][:numbers] # z-axis represent signal batch number
# row += kfn.params[:inputPort][:noise][:numbers][1]
col += kfn.params[:inputPort][:noise][:numbers][2] col += kfn.params[:inputPort][:noise][:numbers][2]
kfn.inputSize = [row, col] |> device
col += kfn.params[:computeNeuron][:lif][:numbers][2] col += kfn.params[:computeNeuron][:lif][:numbers][2]
col += kfn.params[:computeNeuron][:alif][:numbers][2] col += kfn.params[:computeNeuron][:alif][:numbers][2]
@@ -208,6 +212,7 @@ function kfn_1(params::Dict; device=cpu)
kfn.lif_gammaPd = (similar(kfn.lif_wRec) .= 0.3) |> device kfn.lif_gammaPd = (similar(kfn.lif_wRec) .= 0.3) |> device
kfn.lif_wRecChange = (similar(kfn.lif_wRec) .= 0) |> device kfn.lif_wRecChange = (similar(kfn.lif_wRec) .= 0) |> device
kfn.lif_error = (similar(kfn.lif_wRec) .= 0) |> device kfn.lif_error = (similar(kfn.lif_wRec) .= 0) |> device
kfn.lif_subscription = (GeneralUtils.isNotEqual.(kfn.lif_wRec, 0)) |> device
kfn.lif_firingCounter = (similar(kfn.lif_wRec) .= 0) |> device kfn.lif_firingCounter = (similar(kfn.lif_wRec) .= 0) |> device
@@ -254,6 +259,7 @@ function kfn_1(params::Dict; device=cpu)
kfn.alif_gammaPd = (similar(kfn.alif_wRec) .= 0.3) |> device kfn.alif_gammaPd = (similar(kfn.alif_wRec) .= 0.3) |> device
kfn.alif_wRecChange = (similar(kfn.alif_wRec) .= 0) |> device kfn.alif_wRecChange = (similar(kfn.alif_wRec) .= 0) |> device
kfn.alif_error = (similar(kfn.alif_wRec) .= 0) |> device kfn.alif_error = (similar(kfn.alif_wRec) .= 0) |> device
kfn.alif_subscription = (GeneralUtils.isNotEqual.(kfn.alif_wRec, 0)) |> device
kfn.alif_firingCounter = (similar(kfn.alif_wRec) .= 0) |> device kfn.alif_firingCounter = (similar(kfn.alif_wRec) .= 0) |> device
@@ -286,9 +292,13 @@ function kfn_1(params::Dict; device=cpu)
# subscription # subscription
w = zeros(row, col, n) w = zeros(row, col, n)
synapticConnectionPercent = kfn.params[:outputPort][:params][:synapticConnectionPercent] synapticConnectionPercent = kfn.params[:outputPort][:params][:synapticConnectionPercent]
synapticConnection = Int(floor(row*col * synapticConnectionPercent/100)) subable = size(kfn.lif_wRec, 3) + size(kfn.alif_wRec, 3) # sub to lif, alif only
for slice in eachslice(w, dims=3) synapticConnection = Int(floor(subable * synapticConnectionPercent/100))
pool = shuffle!([1:row*col...])[1:synapticConnection] for slice in eachslice(w, dims=3) # each slice is a neuron
startInd = row*col - subable + 1 # e.g. 100(row*col) - 50(subable) = 50 -> startInd = 51
# pool must contain only lif, alif neurons
pool = shuffle!([startInd:row*col...])[1:synapticConnection]
for i in pool for i in pool
slice[i] = randn()/10 # assign weight to synaptic connection slice[i] = randn()/10 # assign weight to synaptic connection
end end
@@ -313,6 +323,7 @@ function kfn_1(params::Dict; device=cpu)
kfn.on_gammaPd = (similar(kfn.on_wOut) .= 0.3) |> device kfn.on_gammaPd = (similar(kfn.on_wOut) .= 0.3) |> device
kfn.on_wOutChange = (similar(kfn.on_wOut) .= 0) |> device kfn.on_wOutChange = (similar(kfn.on_wOut) .= 0) |> device
kfn.on_error = (similar(kfn.on_wOut) .= 0) |> device kfn.on_error = (similar(kfn.on_wOut) .= 0) |> device
kfn.on_subscription = (GeneralUtils.isNotEqual.(kfn.on_wOut, 0)) |> device
kfn.on_firingCounter = (similar(kfn.on_wOut) .= 0) |> device kfn.on_firingCounter = (similar(kfn.on_wOut) .= 0) |> device