ComputeParamsChange()

This commit is contained in:
ton
2023-08-05 14:54:52 +07:00
parent 28f9fb4bdc
commit 9ff7efc7dc
3 changed files with 685 additions and 300 deletions

View File

@@ -22,7 +22,6 @@ function (kfn::kfn_1)(input::AbstractArray)
end
# println(">>> input ", size(input))
# println(">>> zit ", size(kfn.zit))
# println(">>> lif_zit ", size(kfn.lif_zit))
# println(">>> lif_recSignal ", size(kfn.lif_recSignal))
# println(">>> lif_wRec ", size(kfn.lif_wRec))
@@ -31,17 +30,29 @@ function (kfn::kfn_1)(input::AbstractArray)
# println(">>> lif_vt0 ", size(kfn.lif_vt0))
# println(">>> lif_vt0 sum ", sum(kfn.lif_vt0))
# pass input_data into input neuron.
GeneralUtils.cartesianAssign!(kfn.zit, input)
# update activation matrix with "lif_zt1" and "alif_zt1" by concatenating
# (input, lif_zt1, alif_zt1) to form activation matrix
_zit = cat(reshape(input, (size(input, 1), size(input, 2), 1, size(input, 3))),
reshape(kfn.lif_zt, (size(input, 1), :, 1, size(input, 3))),
reshape(kfn.alif_zt, (size(input, 1), :, 1, size(input, 3))), dims=2)
kfn.zit .= reshape(_zit, (size(input, 1), :, size(input, 3)))
lifForward( kfn.zit,
kfn.lif_zit,
# pass input_data into input neuron.
# GeneralUtils.cartesianAssign!(kfn.zit, input)
# kfn.zit = kfn.zit |> device
# input = input |> device
# project 3D kfn zit into 4D lif zit
i1, i2, i3, i4 = size(kfn.lif_zit)
kfn.lif_zit .= reshape(kfn.zit, (i1, i2, 1, i4)) .* kfn.lif_arrayProjection4d
lifForward( kfn.lif_zit,
kfn.lif_wRec,
kfn.lif_vt0,
kfn.lif_vt1,
kfn.lif_vt,
kfn.lif_vth,
kfn.lif_vRest,
kfn.lif_zt1,
kfn.lif_zt4d,
kfn.lif_alpha,
kfn.lif_phi,
kfn.lif_epsilonRec,
@@ -49,23 +60,18 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.lif_refractoryDuration,
kfn.lif_gammaPd,
kfn.lif_firingCounter,
kfn.lif_arrayProjection3DTo4D,
kfn.lif_recSignal,
kfn.lif_decayed_vt0,
kfn.lif_decayed_epsilonRec,
kfn.lif_vt1_diff_vth,
kfn.lif_vt1_diff_vth_div_vth,
kfn.lif_gammaPd_div_vth,
kfn.lif_phiActivation)
kfn.lif_recSignal,)
# project 3D kfn zit into 4D alif zit
i1, i2, i3, i4 = size(kfn.alif_zit)
kfn.alif_zit .= reshape(kfn.zit, (i1, i2, 1, i4)) .* kfn.alif_arrayProjection4d
alifForward( kfn.zit,
kfn.alif_zit,
alifForward(kfn.alif_zit,
kfn.alif_wRec,
kfn.alif_vt0,
kfn.alif_vt1,
kfn.alif_vt,
kfn.alif_vth,
kfn.alif_vRest,
kfn.alif_zt1,
kfn.alif_zt4d,
kfn.alif_alpha,
kfn.alif_phi,
kfn.alif_epsilonRec,
@@ -73,44 +79,35 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.alif_refractoryDuration,
kfn.alif_gammaPd,
kfn.alif_firingCounter,
kfn.alif_arrayProjection3DTo4D,
kfn.alif_recSignal,
kfn.alif_decayed_vt0,
kfn.alif_decayed_epsilonRec,
kfn.alif_vt1_diff_vth,
kfn.alif_vt1_diff_vth_div_vth,
kfn.alif_gammaPd_div_vth,
kfn.alif_phiActivation,
kfn.alif_epsilonRecA,
kfn.alif_avth,
kfn.alif_a,
kfn.alif_avth,
kfn.alif_beta,
kfn.alif_rho,
kfn.alif_phi_x_epsilonRec,
kfn.alif_phi_x_beta,
kfn.alif_rho_diff_phi_x_beta,
kfn.alif_rho_div_phi_x_beta_x_epsilonRecA,
kfn.alif_beta_x_a)
# error("DEBUG -> kfn forward")
kfn.alif_rho,)
# reduce lif_zt4d and alif_zt4d into lif_zt, alif_zt (4d -> 1d)
kfn.lif_zt .= reduce(max, kfn.lif_zt4d, dims=(1,2))
kfn.alif_zt .= reduce(max, kfn.alif_zt4d, dims=(1,2))
# update activation matrix by concatenate (input, lif_zt1, alif_zt1) to form activation matrix
# update activation matrix with "lif_zt1" and "alif_zt1" by concatenating
# (input, lif_zt1, alif_zt1) to form activation matrix
_zit = cat(reshape(input, (size(input, 1), size(input, 2), 1, size(input, 3))),
reshape(kfn.lif_zt1, (size(input, 1), :, 1, size(input, 3))),
reshape(kfn.alif_zt1, (size(input, 1), :, 1, size(input, 3))), dims=2)
reshape(kfn.lif_zt, (size(input, 1), :, 1, size(input, 3))),
reshape(kfn.alif_zt, (size(input, 1), :, 1, size(input, 3))), dims=2)
kfn.zit .= reshape(_zit, (size(input, 1), :, size(input, 3)))
# project 3D kfn zit into 4D on zit
i1, i2, i3, i4 = size(kfn.on_zit)
kfn.on_zit .= reshape(kfn.zit, (i1, i2, 1, i4)) .* kfn.on_arrayProjection4d
# read out
onForward( kfn.zit,
kfn.on_zit,
kfn.on_wOut,
kfn.on_vt0,
kfn.on_vt1,
onForward( kfn.on_zit,
kfn.on_wOut,
kfn.on_vt,
kfn.on_vth,
kfn.on_vRest,
kfn.on_zt1,
kfn.on_zt4d,
kfn.on_alpha,
kfn.on_phi,
kfn.on_epsilonRec,
@@ -118,16 +115,11 @@ function (kfn::kfn_1)(input::AbstractArray)
kfn.on_refractoryDuration,
kfn.on_gammaPd,
kfn.on_firingCounter,
kfn.on_arrayProjection3DTo4D,
kfn.on_recSignal,
kfn.on_decayed_vt0,
kfn.on_decayed_epsilonRec,
kfn.on_vt1_diff_vth,
kfn.on_vt1_diff_vth_div_vth,
kfn.on_gammaPd_div_vth,
kfn.on_phiActivation)
kfn.on_recSignal,)
# error("DEBUG -> kfn forward")
logit = reshape(kfn.on_zt, (size(input, 1), :))
return reshape(kfn.on_zt1, (size(input, 1), :)),
return logit,
kfn.zit
end
@@ -147,7 +139,7 @@ function lifForward(kfn_zit::Array{T},
refractoryDuration::Array{T},
gammaPd::Array{T},
firingCounter::Array{T},
arrayProjection3DTo4D::Array{T},
arrayProjection4d::Array{T},
recSignal::Array{T},
decayed_vt0::Array{T},
decayed_epsilonRec::Array{T},
@@ -158,8 +150,8 @@ function lifForward(kfn_zit::Array{T},
) where T<:Number
# project 3D kfn zit into 4D lif zit
zit .= reshape(kfn_zit,
(size(wRec, 1), size(wRec, 2), 1, size(wRec, 4))) .* arrayProjection3DTo4D
i1, i2, i3, i4 = size(alif_wRec)
lif_zit .= reshape(kfn_zit, (i1, i2, 1, i4)) .* lif_arrayProjection4d
for j in 1:size(wRec, 4), i in 1:size(wRec, 3) # compute along neurons axis of every batch
if sum(@view(refractoryCounter[:,:,i,j])) > 0 # refractory period is active
@@ -199,8 +191,128 @@ function lifForward(kfn_zit::Array{T},
end
end
function alifForward(kfn_zit::Array{T},
zit::Array{T},
# gpu launcher
function lifForward( lif_zit::CuArray,
lif_wRec::CuArray,
lif_vt::CuArray,
lif_vth::CuArray,
lif_vRest::CuArray,
lif_zt::CuArray,
lif_alpha::CuArray,
lif_phi::CuArray,
lif_epsilonRec::CuArray,
lif_refractoryCounter::CuArray,
lif_refractoryDuration::CuArray,
lif_gammaPd::CuArray,
lif_firingCounter::CuArray,
lif_recSignal::CuArray,)
kernel = @cuda launch=false lifForward( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(lif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( lif_zit,
lif_wRec,
lif_vt,
lif_vth,
lif_vRest,
lif_zt,
lif_alpha,
lif_phi,
lif_epsilonRec,
lif_refractoryCounter,
lif_refractoryDuration,
lif_gammaPd,
lif_firingCounter,
lif_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function lifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i] -= 1
if refractoryCounter[i] > 0 # refractory period is active
refractoryCounter[i] -= 1
zt[i] = 0
vt[i] = alpha[i] * vt[i]
phi[i] = 0
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
else # refractory period is inactive
recSignal[i] = zit[i] * wRec[i]
vt[i] = (alpha[i] * vt[i]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i] > vth[i]
zt[i] = 1
refractoryCounter[i] = refractoryDuration[i]
firingCounter[i] += 1
vt[i] = vRest[i]
else
zt[i] = 0
end
# compute phi, there is a difference from lif formula
phi[i] = (gammaPd[i] / vth[i]) * max(0, 1 - ((vt[i] - vth[i]) / vth[i]))
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
end
end
return nothing
end
function alifForward(zit::Array{T},
wRec::Array{T},
vt0::Array{T},
vt1::Array{T},
@@ -214,7 +326,6 @@ function alifForward(kfn_zit::Array{T},
refractoryDuration::Array{T},
gammaPd::Array{T},
firingCounter::Array{T},
arrayProjection3DTo4D::Array{T},
recSignal::Array{T},
decayed_vt0::Array{T},
decayed_epsilonRec::Array{T},
@@ -234,11 +345,6 @@ function alifForward(kfn_zit::Array{T},
rho_div_phi_x_beta_x_epsilonRecA::Array{T},
beta_x_a::Array{T},
) where T<:Number
# project 3D kfn zit into 4D lif zit
zit .= reshape(kfn_zit,
(size(wRec, 1), size(wRec, 2), 1, size(wRec, 4))) .* arrayProjection3DTo4D
for j in 1:size(wRec, 4), i in 1:size(wRec, 3) # compute along neurons axis of every batch
if sum(@view(refractoryCounter[:,:,i,j])) > 0 # refractory period is active
@@ -305,6 +411,164 @@ function alifForward(kfn_zit::Array{T},
end
end
# gpu launcher
function alifForward( alif_zit::CuArray,
alif_wRec::CuArray,
alif_vt::CuArray,
alif_vth::CuArray,
alif_vRest::CuArray,
alif_zt::CuArray,
alif_alpha::CuArray,
alif_phi::CuArray,
alif_epsilonRec::CuArray,
alif_refractoryCounter::CuArray,
alif_refractoryDuration::CuArray,
alif_gammaPd::CuArray,
alif_firingCounter::CuArray,
alif_recSignal::CuArray,
alif_epsilonRecA::CuArray,
alif_a::CuArray,
alif_avth::CuArray,
alif_beta::CuArray,
alif_rho::CuArray,
)
kernel = @cuda launch=false alifForward( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(alif_wRec)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( alif_zit,
alif_wRec,
alif_vt,
alif_vth,
alif_vRest,
alif_zt,
alif_alpha,
alif_phi,
alif_epsilonRec,
alif_refractoryCounter,
alif_refractoryDuration,
alif_gammaPd,
alif_firingCounter,
alif_recSignal,
alif_epsilonRecA,
alif_a,
alif_avth,
alif_beta,
alif_rho,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function alifForward( zit,
wRec,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
epsilonRecA,
a,
avth,
beta,
rho,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wRec)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i] -= 1
if refractoryCounter[i] > 0 # refractory period is active
refractoryCounter[i] -= 1
zt[i] = 0
vt[i] = alpha[i] * vt[i]
phi[i] = 0
a[i] = rho[i] * a[i]
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
# compute epsilonRecA
epsilonRecA[i] = (phi[i] * epsilonRec[i]) +
((rho[i] - (phi[i] * beta[i])) * epsilonRecA[i])
# compute avth
avth[i] = vth[i] + (beta[i] * a[i])
else # refractory period is inactive
recSignal[i] = zit[i] * wRec[i]
vt[i] = (alpha[i] * vt[i]) + sum(@view(recSignal[:,:,i3,i4]))
# compute avth
avth[i] = vth[i] + (beta[i] * a[i])
# fires if membrane potential exceed threshold
if vt[i] > avth[i]
zt[i] = 1
refractoryCounter[i] = refractoryDuration[i]
firingCounter[i] += 1
vt[i] = vRest[i]
a[i] = (rho[i] * a[i]) + 1
else
zt[i] = 0
a[i] = (rho[i] * a[i])
end
# compute phi, there is a difference from alif formula
phi[i] = (gammaPd[i] / vth[i]) * max(0, 1 - ((vt[i] - vth[i]) / vth[i]))
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
# compute epsilonRecA
epsilonRecA[i] = (phi[i] * epsilonRec[i]) +
((rho[i] - (phi[i] * beta[i])) * epsilonRecA[i])
end
end
return nothing
end
function onForward(kfn_zit::Array{T},
zit::Array{T},
wOut::Array{T},
@@ -320,7 +584,7 @@ function onForward(kfn_zit::Array{T},
refractoryDuration::Array{T},
gammaPd::Array{T},
firingCounter::Array{T},
arrayProjection3DTo4D::Array{T},
arrayProjection4d::Array{T},
recSignal::Array{T},
decayed_vt0::Array{T},
decayed_epsilonRec::Array{T},
@@ -332,7 +596,7 @@ function onForward(kfn_zit::Array{T},
# project 3D kfn zit into 4D lif zit
zit .= reshape(kfn_zit,
(size(wOut, 1), size(wOut, 2), 1, size(wOut, 4))) .* arrayProjection3DTo4D
(size(wOut, 1), size(wOut, 2), 1, size(wOut, 4))) .* arrayProjection4d
for j in 1:size(wOut, 4), i in 1:size(wOut, 3) # compute along neurons axis of every batch
if sum(@view(refractoryCounter[:,:,i,j])) > 0 # refractory period is active
@@ -372,57 +636,125 @@ function onForward(kfn_zit::Array{T},
end
end
# function onForward(kfn_zit,
# zit,
# wOut,
# vt0,
# vt1,
# vth,
# vRest,
# zt1,
# alpha,
# phi,
# epsilonRec,
# refractoryCounter,
# refractoryDuration,
# gammaPd,
# firingCounter)
# d1, d2, d3, d4 = size(wOut)
# zit .= reshape(kfn_zit, (d1, d2, 1, d4)) .* ones(size(wOut)...) # project zit into zit
# gpu launcher
function onForward( on_zit::CuArray,
on_wOut::CuArray,
on_vt::CuArray,
on_vth::CuArray,
on_vRest::CuArray,
on_zt::CuArray,
on_alpha::CuArray,
on_phi::CuArray,
on_epsilonRec::CuArray,
on_refractoryCounter::CuArray,
on_refractoryDuration::CuArray,
on_gammaPd::CuArray,
on_firingCounter::CuArray,
on_recSignal::CuArray)
kernel = @cuda launch=false onForward( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian)
config = launch_configuration(kernel.fun)
# for j in 1:d4, i in 1:d3 # compute along neurons axis of every batch
# if view(refractoryCounter, :, :, i, j)[1] > 0 # neuron is inactive (in refractory period)
# view(refractoryCounter, :, :, i, j)[1] -= 1
# view(zt1, :, :, i, j)[1] = 0
# view(vt1, :, :, i, j)[1] =
# view(alpha, :, :, i, j)[1] * view(vt0, :, :, i, j)[1]
# view(phi, :, :, i, j)[1] = 0.0
# view(epsilonRec, :, :, i, j) .= view(alpha, :, :, i, j)[1] .*
# view(epsilonRec, :, :, i, j)
# else # neuron is active
# view(vt1, :, :, i, j)[1] =
# (view(alpha, :, :, i, j)[1] * view(vt0,:, :, i, j)[1]) +
# sum(view(zit, :, :, i, j) .* view(wOut, :, :, i, j))
# if view(vt1, :, :, i, j)[1] > view(vth, :, :, i, j)[1]
# view(zt1, :, :, i, j)[1] = 1
# view(refractoryCounter, :, :, i, j)[1] =
# view(refractoryDuration, :, :, i, j)[1]
# view(firingCounter, :, :, i, j)[1] += 1
# view(vt1, :, :, i, j)[1] = view(vRest, :, :, i, j)[1]
# else
# view(zt1, :, :, i, j)[1] = 0
# end
# # there is a difference from alif formula
# view(phi, :, :, i, j)[1] =
# (view(gammaPd, :, :, i, j)[1] / view(vth, :, :, i, j)[1]) *
# max(0, 1 - ((view(vt1, :, :, i, j)[1] - view(vth, :, :, i, j)[1]) /
# view(vth, :, :, i, j)[1]))
# view(epsilonRec, :, :, i, j) .=
# (view(alpha, :, :, i, j)[1] .* view(epsilonRec, :, :, i, j)) +
# view(zit, :, :, i, j)
# end
# end
# end
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # depend on gpu. Most NVIDIA gpu has 1024 threads per block
# total desired threads to launch to gpu. Usually 1 thread per 1 matrix element
totalThreads = length(on_wOut)
blocks = cld(totalThreads, threads)
# println("launching gpu kernel")
CUDA.@sync begin
kernel( on_zit,
on_wOut,
on_vt,
on_vth,
on_vRest,
on_zt,
on_alpha,
on_phi,
on_epsilonRec,
on_refractoryCounter,
on_refractoryDuration,
on_gammaPd,
on_firingCounter,
on_recSignal,
GeneralUtils.linear_to_cartesian; threads, blocks)
end
end
# gpu kernel
function onForward( zit,
wOut,
vt,
vth,
vRest,
zt,
alpha,
phi,
epsilonRec,
refractoryCounter,
refractoryDuration,
gammaPd,
firingCounter,
recSignal,
linear_to_cartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x # gpu threads index
if i <= length(wOut)
# cartesian index
i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
# @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
refractoryCounter[i] -= 1
if refractoryCounter[i] > 0 # refractory period is active
refractoryCounter[i] -= 1
zt[i] = 0
vt[i] = alpha[i] * vt[i]
phi[i] = 0
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
else # refractory period is inactive
recSignal[i] = zit[i] * wOut[i]
vt[i] = (alpha[i] * vt[i]) + sum(@view(recSignal[:,:,i3,i4]))
# fires if membrane potential exceed threshold
if vt[i] > vth[i]
zt[i] = 1
refractoryCounter[i] = refractoryDuration[i]
firingCounter[i] += 1
vt[i] = vRest[i]
else
zt[i] = 0
end
# compute phi, there is a difference from on formula
phi[i] = (gammaPd[i] / vth[i]) * max(0, 1 - ((vt[i] - vth[i]) / vth[i]))
# compute epsilonRec
epsilonRec[i] = (alpha[i] * epsilonRec[i]) + zit[i]
end
end
return nothing
end