diff --git a/src/forward.jl b/src/forward.jl
index 20fb70b..d5f9498 100644
--- a/src/forward.jl
+++ b/src/forward.jl
@@ -59,7 +59,9 @@ function (kfn::kfn_1)(input::AbstractArray)
                                 kfn.lif_refractoryDuration,
                                 kfn.lif_gammaPd,
                                 kfn.lif_firingCounter,
-                                kfn.lif_recSignal,)
+                                kfn.lif_recSignal,
+                                kfn.lif_subscription,
+                                )
         end
         @async begin
                 # project 3D kfn zit into 4D alif zit
@@ -80,6 +82,7 @@ function (kfn::kfn_1)(input::AbstractArray)
                                 kfn.alif_gammaPd,
                                 kfn.alif_firingCounter,
                                 kfn.alif_recSignal,
+                                kfn.alif_subscription,
                                 kfn.alif_epsilonRecA,
                                 kfn.alif_a,
                                 kfn.alif_avth,
@@ -117,7 +120,9 @@ function (kfn::kfn_1)(input::AbstractArray)
                 kfn.on_refractoryDuration,
                 kfn.on_gammaPd,
                 kfn.on_firingCounter,
-                kfn.on_recSignal,)
+                kfn.on_recSignal,
+                kfn.on_subscription,
+                )
     
     logit = reshape(kfn.on_zt, (size(input, 1), :))
 
@@ -126,6 +131,434 @@ function (kfn::kfn_1)(input::AbstractArray)
             kfn.zit
 end
 
+# gpu launcher
+function lifForward(    zit::CuArray,
+                        wRec::CuArray,
+                        vt::CuArray,
+                        vth::CuArray,
+                        vRest::CuArray,
+                        zt::CuArray,
+                        alpha::CuArray,
+                        phi::CuArray,
+                        epsilonRec::CuArray,
+                        refractoryCounter::CuArray,
+                        refractoryDuration::CuArray,
+                        gammaPd::CuArray,
+                        firingCounter::CuArray,
+                        recSignal::CuArray,
+                        subscription::CuArray,
+                        )
+    
+    kernel = @cuda launch=false lifForward(     zit,
+                                                wRec,
+                                                vt,
+                                                vth,
+                                                vRest,
+                                                zt,
+                                                alpha,
+                                                phi,
+                                                epsilonRec,
+                                                refractoryCounter,
+                                                refractoryDuration,
+                                                gammaPd,
+                                                firingCounter,
+                                                recSignal,
+                                                subscription,
+                                                GeneralUtils.linear_to_cartesian,
+                                                )
+    config = launch_configuration(kernel.fun)
+    
+    
+    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
+    # one just launch threads more than this kernel needs then use a guard inside the kernel
+    # to prevent unused threads to access memory. 
+    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
+
+    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
+    totalThreads = length(wRec)   
+ 
+    blocks = cld(totalThreads, threads)
+        # println("launching gpu kernel")
+    CUDA.@sync begin
+        kernel( zit,
+                wRec,
+                vt,
+                vth,
+                vRest,
+                zt,
+                alpha,
+                phi,
+                epsilonRec,
+                refractoryCounter,
+                refractoryDuration,
+                gammaPd,
+                firingCounter,
+                recSignal,
+                subscription,
+                GeneralUtils.linear_to_cartesian;  threads, blocks)
+    end
+end
+
+# gpu kernel
+function lifForward(    zit,
+                        wRec,
+                        vt,
+                        vth,
+                        vRest,
+                        zt,
+                        alpha,
+                        phi,
+                        epsilonRec,
+                        refractoryCounter,
+                        refractoryDuration,
+                        gammaPd,
+                        firingCounter,
+                        recSignal,
+                        subscription,
+                        linear_to_cartesian,
+                        )
+        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
+
+        if i <= length(wRec)
+                # cartesian index
+                i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
+                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
+
+                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
+                        refractoryCounter[i1,i2,i3,i4] -= 1
+                        recSignal[i1,i2,i3,i4] = 0
+                        zt[i1,i2,i3,i4] = 0
+                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
+                        phi[i1,i2,i3,i4] = 0
+                        
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+
+                else    # refractory period is inactive
+                        recSignal[i1,i2,i3,i4] = wRec[i1,i2,i3,i4] * zit[i1,i2,i3,i4]
+                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + 
+                                                sum(@view(recSignal[:,:,i3,i4]))
+
+                        # fires if membrane potential exceed threshold
+                        if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
+                                zt[i1,i2,i3,i4] = 1
+                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
+                                firingCounter[i1,i2,i3,i4] += 1
+                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
+                        else
+                                zt[i1,i2,i3,i4] = 0
+                        end
+
+                        # compute phi, there is a difference from lif formula
+                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * 
+                                max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
+
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) +
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+                end
+        end
+        return nothing
+end
+
+# gpu launcher
+function alifForward(   zit::CuArray,
+                        wRec::CuArray,
+                        vt::CuArray,
+                        vth::CuArray,
+                        vRest::CuArray,
+                        zt::CuArray,
+                        alpha::CuArray,
+                        phi::CuArray,
+                        epsilonRec::CuArray,
+                        refractoryCounter::CuArray,
+                        refractoryDuration::CuArray,
+                        gammaPd::CuArray,
+                        firingCounter::CuArray,
+                        recSignal::CuArray,
+                        subscription::CuArray,
+                        epsilonRecA::CuArray,
+                        a::CuArray,
+                        avth::CuArray,
+                        beta::CuArray,
+                        rho::CuArray,
+                        )
+
+    kernel = @cuda launch=false alifForward(    zit,
+                                                wRec, 
+                                                vt,
+                                                vth,
+                                                vRest,
+                                                zt,
+                                                alpha,
+                                                phi,
+                                                epsilonRec,
+                                                refractoryCounter,
+                                                refractoryDuration,
+                                                gammaPd,
+                                                firingCounter,
+                                                recSignal,
+                                                subscription,
+                                                epsilonRecA,
+                                                a,
+                                                avth,
+                                                beta,
+                                                rho,
+                                                GeneralUtils.linear_to_cartesian,
+                                                )
+    config = launch_configuration(kernel.fun)
+    
+    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
+    # one just launch threads more than this kernel needs then use a guard inside the kernel
+    # to prevent unused threads to access memory. 
+    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
+
+    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
+    totalThreads = length(wRec)   
+ 
+    blocks = cld(totalThreads, threads)
+        # println("launching gpu kernel")
+    CUDA.@sync begin
+        kernel( zit,
+                wRec, 
+                vt,
+                vth,
+                vRest,
+                zt,
+                alpha,
+                phi,
+                epsilonRec,
+                refractoryCounter,
+                refractoryDuration,
+                gammaPd,
+                firingCounter,
+                recSignal,
+                subscription,
+                epsilonRecA,
+                a,
+                avth,
+                beta,
+                rho,
+                GeneralUtils.linear_to_cartesian;  threads, blocks)
+    end
+end
+
+# gpu kernel
+function alifForward(   zit,
+                        wRec, 
+                        vt,
+                        vth,
+                        vRest,
+                        zt,
+                        alpha,
+                        phi,
+                        epsilonRec,
+                        refractoryCounter,
+                        refractoryDuration,
+                        gammaPd,
+                        firingCounter,
+                        recSignal,
+                        subscription,
+                        epsilonRecA,
+                        a,
+                        avth,
+                        beta,
+                        rho,
+                        linear_to_cartesian,
+                        )
+        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
+
+        if i <= length(wRec)
+                # cartesian index
+                i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
+                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
+
+                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
+                        refractoryCounter[i1,i2,i3,i4] -= 1
+                        recSignal[i1,i2,i3,i4] = 0
+                        zt[i1,i2,i3,i4] = 0
+                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
+                        phi[i1,i2,i3,i4] = 0
+                        a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
+                        
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+
+                        # compute epsilonRecA
+                        epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
+                                        ((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * 
+                                        epsilonRecA[i1,i2,i3,i4])
+
+                        # compute avth
+                        avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
+
+                else    # refractory period is inactive
+                        recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
+                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + 
+                                                sum(@view(recSignal[:,:,i3,i4]))
+                        
+                        # compute avth
+                        avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
+
+                        # fires if membrane potential exceed threshold
+                        if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
+                                zt[i1,i2,i3,i4] = 1
+                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
+                                firingCounter[i1,i2,i3,i4] += 1
+                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
+                                a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
+                        else
+                                zt[i1,i2,i3,i4] = 0
+                                a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
+                        end
+
+                        # compute phi, there is a difference from alif formula
+                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * 
+                                max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
+
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+
+                        # compute epsilonRecA   use eq.26
+                        epsilonRecA[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * 
+                                                        (phi[i1,i2,i3,i4] * epsilonRecA[i1,i2,i3,i4])) + 
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+                end
+        end
+        return nothing
+end
+
+# gpu launcher
+function onForward(     zit::CuArray,
+                        wOut::CuArray,
+                        vt::CuArray,
+                        vth::CuArray,
+                        vRest::CuArray,
+                        zt::CuArray,
+                        alpha::CuArray,
+                        phi::CuArray,
+                        epsilonRec::CuArray,
+                        refractoryCounter::CuArray,
+                        refractoryDuration::CuArray,
+                        gammaPd::CuArray,
+                        firingCounter::CuArray,
+                        recSignal::CuArray,
+                        subscription::CuArray,
+                        )
+                        
+    kernel = @cuda launch=false onForward(      zit,
+                                                wOut,
+                                                vt,
+                                                vth,
+                                                vRest,
+                                                zt,
+                                                alpha,
+                                                phi,
+                                                epsilonRec,
+                                                refractoryCounter,
+                                                refractoryDuration,
+                                                gammaPd,
+                                                firingCounter,
+                                                recSignal,
+                                                subscription,
+                                                GeneralUtils.linear_to_cartesian,
+                                                )
+    config = launch_configuration(kernel.fun)
+    
+    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
+    # one just launch threads more than this kernel needs then use a guard inside the kernel
+    # to prevent unused threads to access memory. 
+    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
+
+    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
+    totalThreads = length(wOut)   
+ 
+    blocks = cld(totalThreads, threads)
+        # println("launching gpu kernel")
+    CUDA.@sync begin
+        kernel( zit,
+                wOut,
+                vt,
+                vth,
+                vRest,
+                zt,
+                alpha,
+                phi,
+                epsilonRec,
+                refractoryCounter,
+                refractoryDuration,
+                gammaPd,
+                firingCounter,
+                recSignal,
+                subscription,
+                GeneralUtils.linear_to_cartesian;  threads, blocks)
+    end
+end
+
+# gpu kernel
+function onForward(     zit,
+                        wOut,
+                        vt,
+                        vth,
+                        vRest,
+                        zt,
+                        alpha,
+                        phi,
+                        epsilonRec,
+                        refractoryCounter,
+                        refractoryDuration,
+                        gammaPd,
+                        firingCounter,
+                        recSignal,
+                        subscription,
+                        linear_to_cartesian,
+                        )
+        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
+
+        if i <= length(wOut)
+                # cartesian index
+                i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
+                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
+
+                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
+                        refractoryCounter[i1,i2,i3,i4] -= 1
+                        recSignal[i1,i2,i3,i4] = 0
+                        zt[i1,i2,i3,i4] = 0
+                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
+                        phi[i1,i2,i3,i4] = 0
+                        
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+
+                else    # refractory period is inactive
+                        recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
+                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
+
+                        # fires if membrane potential exceed threshold
+                        if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
+                                zt[i1,i2,i3,i4] = 1
+                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
+                                firingCounter[i1,i2,i3,i4] += 1
+                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
+                        else
+                                zt[i1,i2,i3,i4] = 0
+                        end
+
+                        # compute phi, there is a difference from on formula
+                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
+
+                        # compute epsilonRec
+                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
+                                                        (zit[i1,i2,i3,i4] * subscription[i1,i2,i3,i4])
+                end
+        end
+        return nothing
+end
+
 function lifForward(kfn_zit::Array{T}, 
                     zit::Array{T},
                     wRec::Array{T}, 
@@ -193,127 +626,6 @@ function lifForward(kfn_zit::Array{T},
     end
 end
 
-# gpu launcher
-function lifForward(    lif_zit::CuArray,
-                        lif_wRec::CuArray,
-                        lif_vt::CuArray,
-                        lif_vth::CuArray,
-                        lif_vRest::CuArray,
-                        lif_zt::CuArray,
-                        lif_alpha::CuArray,
-                        lif_phi::CuArray,
-                        lif_epsilonRec::CuArray,
-                        lif_refractoryCounter::CuArray,
-                        lif_refractoryDuration::CuArray,
-                        lif_gammaPd::CuArray,
-                        lif_firingCounter::CuArray,
-                        lif_recSignal::CuArray,)
-    
-    kernel = @cuda launch=false lifForward(     lif_zit,
-                                                lif_wRec,
-                                                lif_vt,
-                                                lif_vth,
-                                                lif_vRest,
-                                                lif_zt,
-                                                lif_alpha,
-                                                lif_phi,
-                                                lif_epsilonRec,
-                                                lif_refractoryCounter,
-                                                lif_refractoryDuration,
-                                                lif_gammaPd,
-                                                lif_firingCounter,
-                                                lif_recSignal,
-                                                GeneralUtils.linear_to_cartesian)
-    config = launch_configuration(kernel.fun)
-    
-    
-    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
-    # one just launch threads more than this kernel needs then use a guard inside the kernel
-    # to prevent unused threads to access memory. 
-    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
-
-    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
-    totalThreads = length(lif_wRec)   
- 
-    blocks = cld(totalThreads, threads)
-        # println("launching gpu kernel")
-    CUDA.@sync begin
-        kernel( lif_zit,
-                lif_wRec,
-                lif_vt,
-                lif_vth,
-                lif_vRest,
-                lif_zt,
-                lif_alpha,
-                lif_phi,
-                lif_epsilonRec,
-                lif_refractoryCounter,
-                lif_refractoryDuration,
-                lif_gammaPd,
-                lif_firingCounter,
-                lif_recSignal,
-                GeneralUtils.linear_to_cartesian;  threads, blocks)
-    end
-end
-
-# gpu kernel
-function lifForward(    zit,
-                        wRec,
-                        vt,
-                        vth,
-                        vRest,
-                        zt,
-                        alpha,
-                        phi,
-                        epsilonRec,
-                        refractoryCounter,
-                        refractoryDuration,
-                        gammaPd,
-                        firingCounter,
-                        recSignal,
-                        linear_to_cartesian)
-        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
-
-        if i <= length(wRec)
-                # cartesian index
-                i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
-                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
-
-                refractoryCounter[i1,i2,i3,i4] -= 1
-
-                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
-                        refractoryCounter[i1,i2,i3,i4] -= 1
-                        zt[i1,i2,i3,i4] = 0
-                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
-                        phi[i1,i2,i3,i4] = 0
-                        
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-
-                else    # refractory period is inactive
-                        recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
-                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
-
-                        # fires if membrane potential exceed threshold
-                        if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
-                                zt[i1,i2,i3,i4] = 1
-                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
-                                firingCounter[i1,i2,i3,i4] += 1
-                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
-                        else
-                                zt[i1,i2,i3,i4] = 0
-                        end
-
-                        # compute phi, there is a difference from lif formula
-                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
-
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-                end
-        end
-        return nothing
-end
-
 function alifForward(zit::Array{T},
                     wRec::Array{T},
                     vt0::Array{T},
@@ -413,164 +725,6 @@ function alifForward(zit::Array{T},
         end
 end
 
-# gpu launcher
-function alifForward(   alif_zit::CuArray,
-                        alif_wRec::CuArray,
-                        alif_vt::CuArray,
-                        alif_vth::CuArray,
-                        alif_vRest::CuArray,
-                        alif_zt::CuArray,
-                        alif_alpha::CuArray,
-                        alif_phi::CuArray,
-                        alif_epsilonRec::CuArray,
-                        alif_refractoryCounter::CuArray,
-                        alif_refractoryDuration::CuArray,
-                        alif_gammaPd::CuArray,
-                        alif_firingCounter::CuArray,
-                        alif_recSignal::CuArray,
-                        alif_epsilonRecA::CuArray,
-                        alif_a::CuArray,
-                        alif_avth::CuArray,
-                        alif_beta::CuArray,
-                        alif_rho::CuArray,
-                        )
-
-    kernel = @cuda launch=false alifForward(    alif_zit,
-                                                alif_wRec, 
-                                                alif_vt,
-                                                alif_vth,
-                                                alif_vRest,
-                                                alif_zt,
-                                                alif_alpha,
-                                                alif_phi,
-                                                alif_epsilonRec,
-                                                alif_refractoryCounter,
-                                                alif_refractoryDuration,
-                                                alif_gammaPd,
-                                                alif_firingCounter,
-                                                alif_recSignal,
-                                                alif_epsilonRecA,
-                                                alif_a,
-                                                alif_avth,
-                                                alif_beta,
-                                                alif_rho,
-                                                GeneralUtils.linear_to_cartesian)
-    config = launch_configuration(kernel.fun)
-    
-    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
-    # one just launch threads more than this kernel needs then use a guard inside the kernel
-    # to prevent unused threads to access memory. 
-    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
-
-    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
-    totalThreads = length(alif_wRec)   
- 
-    blocks = cld(totalThreads, threads)
-        # println("launching gpu kernel")
-    CUDA.@sync begin
-        kernel( alif_zit,
-                alif_wRec, 
-                alif_vt,
-                alif_vth,
-                alif_vRest,
-                alif_zt,
-                alif_alpha,
-                alif_phi,
-                alif_epsilonRec,
-                alif_refractoryCounter,
-                alif_refractoryDuration,
-                alif_gammaPd,
-                alif_firingCounter,
-                alif_recSignal,
-                alif_epsilonRecA,
-                alif_a,
-                alif_avth,
-                alif_beta,
-                alif_rho,
-                GeneralUtils.linear_to_cartesian;  threads, blocks)
-    end
-end
-
-# gpu kernel
-function alifForward(   zit,
-                        wRec, 
-                        vt,
-                        vth,
-                        vRest,
-                        zt,
-                        alpha,
-                        phi,
-                        epsilonRec,
-                        refractoryCounter,
-                        refractoryDuration,
-                        gammaPd,
-                        firingCounter,
-                        recSignal,
-                        epsilonRecA,
-                        a,
-                        avth,
-                        beta,
-                        rho,
-                        linear_to_cartesian)
-        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
-
-        if i <= length(wRec)
-                # cartesian index
-                i1, i2, i3, i4 = linear_to_cartesian(i, size(wRec))
-                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
-
-                refractoryCounter[i1,i2,i3,i4] -= 1
-
-                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
-                        refractoryCounter[i1,i2,i3,i4] -= 1
-                        zt[i1,i2,i3,i4] = 0
-                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
-                        phi[i1,i2,i3,i4] = 0
-                        a[i1,i2,i3,i4] = rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]
-                        
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-
-                        # compute epsilonRecA
-                        epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
-                                                ((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
-
-                        # compute avth
-                        avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
-
-                else    # refractory period is inactive
-                        recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wRec[i1,i2,i3,i4]
-                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
-                        
-                        # compute avth
-                        avth[i1,i2,i3,i4] = vth[i1,i2,i3,i4] + (beta[i1,i2,i3,i4] * a[i1,i2,i3,i4])
-
-                        # fires if membrane potential exceed threshold
-                        if vt[i1,i2,i3,i4] > avth[i1,i2,i3,i4]
-                                zt[i1,i2,i3,i4] = 1
-                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
-                                firingCounter[i1,i2,i3,i4] += 1
-                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
-                                a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4]) + 1
-                        else
-                                zt[i1,i2,i3,i4] = 0
-                                a[i1,i2,i3,i4] = (rho[i1,i2,i3,i4] * a[i1,i2,i3,i4])
-                        end
-
-                        # compute phi, there is a difference from alif formula
-                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
-
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-
-                        # compute epsilonRecA
-                        epsilonRecA[i1,i2,i3,i4] = (phi[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + 
-                                                ((rho[i1,i2,i3,i4] - (phi[i1,i2,i3,i4] * beta[i1,i2,i3,i4])) * epsilonRecA[i1,i2,i3,i4])
-                end
-        end
-        return nothing
-end
-
 function onForward(kfn_zit::Array{T},
                     zit::Array{T},
                     wOut::Array{T},
@@ -638,133 +792,6 @@ function onForward(kfn_zit::Array{T},
     end
 end
 
-# gpu launcher
-function onForward(     on_zit::CuArray,
-                        on_wOut::CuArray,
-                        on_vt::CuArray,
-                        on_vth::CuArray,
-                        on_vRest::CuArray,
-                        on_zt::CuArray,
-                        on_alpha::CuArray,
-                        on_phi::CuArray,
-                        on_epsilonRec::CuArray,
-                        on_refractoryCounter::CuArray,
-                        on_refractoryDuration::CuArray,
-                        on_gammaPd::CuArray,
-                        on_firingCounter::CuArray,
-                        on_recSignal::CuArray)
-                        
-    kernel = @cuda launch=false onForward(      on_zit,
-                                                on_wOut,
-                                                on_vt,
-                                                on_vth,
-                                                on_vRest,
-                                                on_zt,
-                                                on_alpha,
-                                                on_phi,
-                                                on_epsilonRec,
-                                                on_refractoryCounter,
-                                                on_refractoryDuration,
-                                                on_gammaPd,
-                                                on_firingCounter,
-                                                on_recSignal,
-                                                GeneralUtils.linear_to_cartesian)
-    config = launch_configuration(kernel.fun)
-    
-    # threads to be launched. Since one can't launch exact thread number the kernel needs, 
-    # one just launch threads more than this kernel needs then use a guard inside the kernel
-    # to prevent unused threads to access memory. 
-    threads = min(1024, config.threads)    # depend on gpu. Most NVIDIA gpu has 1024 threads per block
-
-    # total desired threads to launch to gpu. Usually 1 thread per 1 matrix element 
-    totalThreads = length(on_wOut)   
- 
-    blocks = cld(totalThreads, threads)
-        # println("launching gpu kernel")
-    CUDA.@sync begin
-        kernel( on_zit,
-                on_wOut,
-                on_vt,
-                on_vth,
-                on_vRest,
-                on_zt,
-                on_alpha,
-                on_phi,
-                on_epsilonRec,
-                on_refractoryCounter,
-                on_refractoryDuration,
-                on_gammaPd,
-                on_firingCounter,
-                on_recSignal,
-                GeneralUtils.linear_to_cartesian;  threads, blocks)
-    end
-end
-
-# gpu kernel
-function onForward(     zit,
-                        wOut,
-                        vt,
-                        vth,
-                        vRest,
-                        zt,
-                        alpha,
-                        phi,
-                        epsilonRec,
-                        refractoryCounter,
-                        refractoryDuration,
-                        gammaPd,
-                        firingCounter,
-                        recSignal,
-                        linear_to_cartesian)
-        i = (blockIdx().x - 1) * blockDim().x + threadIdx().x   # gpu threads index
-
-        if i <= length(wOut)
-                # cartesian index
-                i1, i2, i3, i4 = linear_to_cartesian(i, size(wOut))
-                # @cuprintln("gpu thread $i $i1 $i2 $i3 $i4")
-
-                refractoryCounter[i1,i2,i3,i4] -= 1
-
-                if refractoryCounter[i1,i2,i3,i4] > 0        # refractory period is active
-                        refractoryCounter[i1,i2,i3,i4] -= 1
-                        zt[i1,i2,i3,i4] = 0
-                        vt[i1,i2,i3,i4] = alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]
-                        phi[i1,i2,i3,i4] = 0
-                        
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-
-                else    # refractory period is inactive
-                        recSignal[i1,i2,i3,i4] = zit[i1,i2,i3,i4] * wOut[i1,i2,i3,i4]
-                        vt[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * vt[i1,i2,i3,i4]) + sum(@view(recSignal[:,:,i3,i4]))
-
-                        # fires if membrane potential exceed threshold
-                        if vt[i1,i2,i3,i4] > vth[i1,i2,i3,i4]
-                                zt[i1,i2,i3,i4] = 1
-                                refractoryCounter[i1,i2,i3,i4] = refractoryDuration[i1,i2,i3,i4]
-                                firingCounter[i1,i2,i3,i4] += 1
-                                vt[i1,i2,i3,i4] = vRest[i1,i2,i3,i4]
-                        else
-                                zt[i1,i2,i3,i4] = 0
-                        end
-
-                        # compute phi, there is a difference from on formula
-                        phi[i1,i2,i3,i4] = (gammaPd[i1,i2,i3,i4] / vth[i1,i2,i3,i4]) * max(0, 1 - ((vt[i1,i2,i3,i4] - vth[i1,i2,i3,i4]) / vth[i1,i2,i3,i4]))
-
-                        # compute epsilonRec
-                        epsilonRec[i1,i2,i3,i4] = (alpha[i1,i2,i3,i4] * epsilonRec[i1,i2,i3,i4]) + zit[i1,i2,i3,i4]
-                end
-        end
-        return nothing
-end
-
-
-
-
-
-
-
-
 
 
 
diff --git a/src/learn.jl b/src/learn.jl
index 2bf68a1..e65ab72 100644
--- a/src/learn.jl
+++ b/src/learn.jl
@@ -9,6 +9,7 @@ using ..type, ..snnUtil
 #------------------------------------------------------------------------------------------------100
 
 function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
+    modelError = reshape(modelError, (1,1,1,:)) # (1,1,1,batch)
     lifComputeParamsChange!(kfn.lif_phi, 
                             kfn.lif_epsilonRec, 
                             kfn.lif_eta,
@@ -18,7 +19,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
                             kfn.on_wOut, 
                             kfn.lif_arrayProjection4d,
                             kfn.lif_error,
-                            modelError)
+                            modelError,
+                            
+                            kfn.inputSize,
+                            )
     
     alifComputeParamsChange!(kfn.alif_phi, 
                              kfn.alif_epsilonRec, 
@@ -30,7 +34,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
                              kfn.alif_arrayProjection4d,
                              kfn.alif_error,
                              modelError,
-                             kfn.alif_beta)
+
+                             kfn.alif_epsilonRecA,
+                             kfn.alif_beta,
+                             )
     
     onComputeParamsChange!(kfn.on_phi,
                            kfn.on_epsilonRec,
@@ -38,7 +45,10 @@ function compute_paramsChange!(kfn::kfn_1, modelError, outputError)
                            kfn.on_eRec,
                            kfn.on_wOut,
                            kfn.on_wOutChange,
-                           outputError)
+                           kfn.on_arrayProjection4d,
+                           kfn.on_error,
+                           outputError,
+                           )
     # error("DEBUG ->  kfn compute_paramsChange!    $(Dates.now())")
 end
 
@@ -51,18 +61,28 @@ function lifComputeParamsChange!(   phi::CuArray,
                                     wOut::CuArray,
                                     arrayProjection4d::CuArray,
                                     nError::CuArray,
-                                    modelError::CuArray)
-
-    # Bₖⱼ in paper, sum() to get each neuron's total wOut weight
-    wOutSum = sum(wOut, dims=3) .* arrayProjection4d
-
+                                    modelError::CuArray,
+                                    
+                                    inputSize::CuArray,
+                                    )
+    # Bₖⱼ in paper, sum() to get each neuron's total wOut weight,
+    # use absolute because only magnitude is needed
+    wOutSum_all = reshape( abs.(sum(wOut, dims=3)), (1,1,:, size(wOut, 4)) )    # (1,1,allNeuron,batch)
+    
+    # get only each lif neuron's wOut, leaving out other neuron's wOut
+    startIndex = prod(inputSize) +1
+    stopIndex = startIndex + size(wRec, 3)  -1
+    wOutSum = @view(wOutSum_all[1,1, startIndex:stopIndex, :])  
+    wOutSum = reshape(wOutSum, (1, 1, size(wOutSum, 1), size(wOutSum, 2)))  # (1,1,n,batch)
+    
     # nError a.k.a. learning signal use dopamine concept, 
     # this neuron receive summed error signal (modelError)
-    nError .= (modelError .* arrayProjection4d) .* wOutSum
+    nError .= (modelError .* wOutSum) .* arrayProjection4d
     eRec .= phi .* epsilonRec
-    # GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange
-    wRecChange .+= ((-1 .* eta) .* nError .* eRec .* sign.(wRec)) .* GeneralUtils.isNotEqual.(wRec, 0)
-    # error("DEBUG ->  lifComputeParamsChange!    $(Dates.now())")
+    wRecChange .+= ((-1 .* eta) .* nError .* eRec)
+
+    # reset epsilonRec
+    epsilonRec .= 0
 end
 
 function alifComputeParamsChange!(  phi::CuArray,
@@ -75,18 +95,29 @@ function alifComputeParamsChange!(  phi::CuArray,
                                     arrayProjection4d::CuArray,
                                     nError::CuArray,
                                     modelError::CuArray,
-                                    beta::CuArray)
 
-    # Bₖⱼ in paper, sum() to get each neuron's total wOut weight
-    wOutSum = sum(wOut, dims=3) .* arrayProjection4d
+                                    epsilonRecA::CuArray,
+                                    beta::CuArray
+                                    )
 
+    # Bₖⱼ in paper, sum() to get each neuron's total wOut weight,
+    # use absolute because only magnitude is needed
+    wOutSum_all = reshape( abs.(sum(wOut, dims=3)), (1,1,:, size(wOut, 4)) )    # (1,1,allNeuron,batch)
+    
+    # get only each lif neuron's wOut, leaving out other neuron's wOut
+    wOutSum = @view(wOutSum_all[1,1, end-size(wRec, 3)+1:end, :])  
+    wOutSum = reshape(wOutSum, (1, 1, size(wOutSum, 1), size(wOutSum, 2)))  # (1,1,n,batch)
+    
     # nError a.k.a. learning signal use dopamine concept, 
     # this neuron receive summed error signal (modelError)
-    nError .= (modelError .* arrayProjection4d) .* wOutSum
-    eRec .= (phi .* epsilonRec) .+ (phi .* epsilonRec .* beta)
+    nError .= (modelError .* wOutSum) .* arrayProjection4d
+    eRec .= phi .* (epsilonRec .- (beta .* epsilonRecA))    # use eq. 25
+    wRecChange .+= ((-1 .* eta) .* nError .* eRec)
+
+    # reset epsilonRec
+    epsilonRec .= 0
+    epsilonRecA .= 0
 
-    # GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange
-    wRecChange .+= ((-1 .* eta) .* nError .* eRec .* sign.(wRec))  .* GeneralUtils.isNotEqual.(wRec, 0)
     # error("DEBUG ->  alifComputeParamsChange!    $(Dates.now())")
 end
 
@@ -96,15 +127,17 @@ function onComputeParamsChange!(phi::CuArray,
                                 eRec::CuArray,
                                 wOut::CuArray,
                                 wOutChange::CuArray,
+                                arrayProjection4d::CuArray,
+                                nError::CuArray,
                                 outputError::CuArray    # outputError is output neuron's error
                                 )   
 
-    # nError a.k.a. learning signal use dopamine concept, 
-    # this neuron receive summed error signal (modelError)
-    eRec .= (phi .* epsilonRec) .* reshape(outputError, (1, 1, :, size(epsilonRec, 4)))
-    
-    # GeneralUtils.isNotEqual(wRec, 0) is a subscribe filter use to filter out non-subscribed wRecChange
-    wOutChange .+= ((-1 .* eta) .* eRec .* sign.(wOut)) .* GeneralUtils.isNotEqual.(wOut, 0)
+    eRec .= phi .* epsilonRec
+    nError .= reshape(outputError, (1, 1, :, size(outputError, 2))) .* arrayProjection4d
+    wOutChange .+= ((-1 .* eta) .* nError .* eRec)
+
+    # reset epsilonRec
+    epsilonRec .= 0
 
     # error("DEBUG ->  onComputeParamsChange!    $(Dates.now())")
 end
@@ -224,20 +257,20 @@ end
 function lifLearn!(wRec,
                     wRecChange,
                     arrayProjection4d)
-
     # merge learning weight with average learning weight
-    wRec .+= (sum(wRecChange) ./ (size(wRec, 4))) .* arrayProjection4d
+    wRec .+= (sum(wRecChange, dims=4) ./ (size(wRec, 4))) .* arrayProjection4d
 
     #TODO synaptic strength 
 
     #TODO neuroplasticity
 
+    # error("DEBUG ->  lifLearn!    $(Dates.now())")
 end
 
 function alifLearn!(wRec,
                     wRecChange,
                     arrayProjection4d)
-    # merge learning weight
+    # merge learning weight with average learning weight
     wRec .+= (sum(wRecChange) ./ (size(wRec, 4))) .* arrayProjection4d
 
     #TODO synaptic strength 
@@ -249,7 +282,7 @@ end
 function onLearn!(wOut,
                     wOutChange,
                     arrayProjection4d)
-    # merge learning weight
+    # merge learning weight with average learning weight
     wOut .+= (sum(wOutChange) ./ (size(wOut, 4))) .* arrayProjection4d
 
     # adaptive wOut to help convergence using c_decay
diff --git a/src/type.jl b/src/type.jl
index 1f38371..ec1a905 100644
--- a/src/type.jl
+++ b/src/type.jl
@@ -21,6 +21,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
 
     timeStep::Union{AbstractArray, Nothing} = nothing
     learningStage::Union{AbstractArray, Nothing} = nothing  # 0 inference, 1 start, 2 during, 3 end learning
+    inputSize::Union{AbstractArray, Nothing} = nothing
     zit::Union{AbstractArray, Nothing} = nothing     # 3D activation matrix
     modelError::Union{AbstractArray, Nothing} = nothing     # store RSNN error
     outputError::Union{AbstractArray, Nothing} = nothing    # store output neurons error
@@ -50,6 +51,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
     lif_gammaPd::Union{AbstractArray, Nothing} = nothing
     lif_wRecChange::Union{AbstractArray, Nothing} = nothing
     lif_error::Union{AbstractArray, Nothing} = nothing
+    lif_subscription::Union{AbstractArray, Nothing} = nothing
 
     lif_firingCounter::Union{AbstractArray, Nothing} = nothing
     
@@ -85,6 +87,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
     alif_gammaPd::Union{AbstractArray, Nothing} = nothing
     alif_wRecChange::Union{AbstractArray, Nothing} = nothing
     alif_error::Union{AbstractArray, Nothing} = nothing
+    alif_subscription::Union{AbstractArray, Nothing} = nothing
 
     alif_firingCounter::Union{AbstractArray, Nothing} = nothing
     
@@ -137,6 +140,7 @@ Base.@kwdef mutable struct kfn_1 <: knowledgeFn
     on_gammaPd::Union{AbstractArray, Nothing} = nothing
     on_wOutChange::Union{AbstractArray, Nothing} = nothing
     on_error::Union{AbstractArray, Nothing} = nothing
+    on_subscription::Union{AbstractArray, Nothing} = nothing
 
     on_firingCounter::Union{AbstractArray, Nothing} = nothing
     
@@ -162,8 +166,8 @@ function kfn_1(params::Dict; device=cpu)
     # ---------------------------------------------------------------------------- #
     # row*col is a 2D matrix represent all RSNN activation
     row, col, batch = kfn.params[:inputPort][:signal][:numbers] # z-axis represent signal batch number
-    # row += kfn.params[:inputPort][:noise][:numbers][1]
     col += kfn.params[:inputPort][:noise][:numbers][2]
+    kfn.inputSize = [row, col]                       |> device 
     col += kfn.params[:computeNeuron][:lif][:numbers][2]
     col += kfn.params[:computeNeuron][:alif][:numbers][2]
 
@@ -208,6 +212,7 @@ function kfn_1(params::Dict; device=cpu)
     kfn.lif_gammaPd = (similar(kfn.lif_wRec) .= 0.3)                   |> device
     kfn.lif_wRecChange = (similar(kfn.lif_wRec) .= 0)                 |> device
     kfn.lif_error = (similar(kfn.lif_wRec) .= 0)               |> device
+    kfn.lif_subscription = (GeneralUtils.isNotEqual.(kfn.lif_wRec, 0)) |> device
 
     kfn.lif_firingCounter = (similar(kfn.lif_wRec) .= 0)               |> device
 
@@ -254,6 +259,7 @@ function kfn_1(params::Dict; device=cpu)
     kfn.alif_gammaPd = (similar(kfn.alif_wRec) .= 0.3)                   |> device
     kfn.alif_wRecChange = (similar(kfn.alif_wRec) .= 0)                 |> device
     kfn.alif_error = (similar(kfn.alif_wRec) .= 0)                 |> device
+    kfn.alif_subscription = (GeneralUtils.isNotEqual.(kfn.alif_wRec, 0))     |> device
 
     kfn.alif_firingCounter = (similar(kfn.alif_wRec) .= 0)               |> device
 
@@ -286,9 +292,13 @@ function kfn_1(params::Dict; device=cpu)
     # subscription
     w = zeros(row, col, n)
     synapticConnectionPercent = kfn.params[:outputPort][:params][:synapticConnectionPercent]
-    synapticConnection = Int(floor(row*col * synapticConnectionPercent/100))
-    for slice in eachslice(w, dims=3)
-        pool = shuffle!([1:row*col...])[1:synapticConnection]
+    subable = size(kfn.lif_wRec, 3) + size(kfn.alif_wRec, 3) # sub to lif, alif only
+    synapticConnection = Int(floor(subable * synapticConnectionPercent/100))
+    for slice in eachslice(w, dims=3)   # each slice is a neuron
+        startInd = row*col - subable + 1    # e.g. 100(row*col) - 50(subable) = 50 -> startInd = 51
+
+        # pool must contain only lif, alif neurons
+        pool = shuffle!([startInd:row*col...])[1:synapticConnection]
         for i in pool
             slice[i] = randn()/10   # assign weight to synaptic connection
         end
@@ -313,6 +323,7 @@ function kfn_1(params::Dict; device=cpu)
     kfn.on_gammaPd = (similar(kfn.on_wOut) .= 0.3)                   |> device
     kfn.on_wOutChange = (similar(kfn.on_wOut) .= 0)                 |> device
     kfn.on_error = (similar(kfn.on_wOut) .= 0)                     |> device
+    kfn.on_subscription = (GeneralUtils.isNotEqual.(kfn.on_wOut, 0)) |> device
 
     kfn.on_firingCounter = (similar(kfn.on_wOut) .= 0)               |> device