This commit is contained in:
2024-09-13 15:07:19 +07:00
commit c8684dea31
28 changed files with 9143 additions and 0 deletions

View File

@@ -0,0 +1,841 @@
module interface
export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue, replaceMoreThan,
replaceLessThan, cartesianAssign!, sumAlongDim3, matMul_3Dto3D_manyTo1batch,
matMul_3Dto4D_batchwise
using JSON3, DataStructures, Distributions, Flux, CUDA
#------------------------------------------------------------------------------------------------100
noNegative!(a::AbstractVector) = replace!(x -> x < 0 ? 0 : x, a)
findNotZero(x::AbstractVector) = findall( (!iszero).(x) )
replaceMoreThan(i, target) = i > target ? target : i
replaceLessThan(i, target) = i < target ? target : i
precision(x::Array{<:Array}) = ( std(mean.(x)) / mean(mean.(x)) ) * 100
precision(x::Array) = std(x) / mean(x) * 100
replaceAt!(x::AbstractVector, ind::Number, value::Number) = x[ind] = value
notZero(x::AbstractVector) = (!iszero).(x)
Zero(x::AbstractVector) = iszero.(x)
isNan(x::AbstractVector) = isnan.(x)
isInf(x::Number) = abs(x) === Inf
isInf(x::AbstractVector) = isinf.(x)
isLess(x::AbstractVector, target::Number) = isless.(x, target)
isMore(x::Number, target::Number) = x > target
isMore(x::AbstractVector, target::Number) = isMore.(x, target)
absolute(x::AbstractVector) = abs.(x)
vecEleMul(x::AbstractVector, y::AbstractVector) = x .* y
vecEleMul(x::Number, y::AbstractVector) = x .* y
expDecay(initialValue::Number, decayFactor::Number, timePass::Number) =
initialValue * (1 - decayFactor)^timePass
mul!(x::AbstractVector, y::AbstractVector) = x .*= y
mul(x::AbstractVector, y::AbstractVector) = x .* y
ReLu(x::Number) = max(0, x)
updateVector!(x::AbstractVector, target::Number) = x .= target
updateVector!(x::AbstractVector, target::AbstractArray) = x .= target
function selectAdd!(x::AbstractVector, ind::AbstractVector, value::AbstractVector)
@. x = x + (ind * value)
end
""" findIndex(input::String, target::Char)
Find target index inside a collection.
Return 1) Bool array of matched target
2) CartesianIndex of every matched target
"""
function findIndex(input::String, target::Char)
match_position = []
for i in input
if i == target
append!(match_position, 1)
else
append!(match_position, 0)
end
end
match_index = findall(isequal.(match_position, 1))
return match_position, match_index
end
function findIndex(input::Array, target::Number)
match_position = isequal.(input, target)
match_index = findall(match_position)
return match_position, match_index
end
# function findIndex(input::Array, target::Array)
# match_position = isone.(zeros(length(input)))
# for i in target
# match_position = match_position + isequal.(input, i)
# end
# match_position = replaceMoreThan.(match_position, 1)
# match_index = findall(isone.(match_position)) # Findall donot work with Int64 vector [1, 0, 0, 1].
# # It only works with BitVector. isone() converts Int64 vector [1, 0, 0, 1] into
# # BitVector [1, 0, 0, 1]
# return match_position, match_index
# end
function findIndex(input::Array, target::Symbol)
match_position = isequal.(input, target)
match_index = findall(match_position)
return match_position, match_index
end
function findIndex(collection::Array{String}, target::String)
match_position = isequal.(collection, target)
match_index = findall(match_position)
return match_position, match_index
end
function findIndex(collection::Array{String}, target::Array{String})
match_position = nothing
match_index = nothing
for i in target
match_pos = isequal.(collection, i)
match_ind = findall(match_pos)
if match_position === nothing
match_position = match_pos
else
match_position = hcat(match_position, match_pos)
end
if match_index === nothing
match_index = match_ind
else
match_index = hcat(match_index, match_ind)
end
end
return match_position, match_index
end
function findIndex(collection::OrderedDict, target::Symbol)
collection_keys = keys(collection)
collection_keys_array = [i for i in collection_keys]
match_position = isequal.(collection_keys_array, target)
match_index = findall(match_position)
return match_position, match_index
end
function findMax(collection::AbstractVector)
maxValue, maxIndex = findmax(collection)
matchPosition = isequal.(collection, maxValue)
return maxValue, maxIndex, matchPosition
end
""" read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
with multiple text file in a folder,
this function read x_th text file in a folder (filename is sorted by OS)
# Example
utils.read_textfile_by_index(cleaned_data_path, 2)
read 2nd txt file in a folder
"""
function read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
if isdir(folder_path)
filenumber = length(readdir(folder_path))
if read_file_number > filenumber
error("you specified read_file_number = $read_file_number which is out
of range, the cleaned data folder has only $filenumber files")
return nothing, nothing, nothing
else
content = 0
# open each file in the directory and read
filename = readdir(folder_path, join=true, sort=false)[read_file_number]
f = open(filename)
content = readlines(f)
# content = read(f)
close(f)
end
return read_file_number, filename, content
else
error("ERROR no file or folder at $folder_path")
return nothing, nothing, nothing
end
end
#------------------------------------------------------------------------------------------------100
""" Array_to_JSON3_str(data::AbstractArray)
encode Array to JSON3 String
# Example
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
json3_str = Array_to_JSON3_str(a)
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
"""
function Array_to_JSON3_str(data::AbstractArray)
d = Dict("Array"=> data, "size"=>size(data))
json3_str = JSON3.write(d)
return json3_str
end
#------------------------------------------------------------------------------------------------100
""" JSON3_str_to_Array(json3_str::String)
decode JSON3 String to Array
# Example
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
a = JSON3_str_to_Array(json3_str)
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
"""
function JSON3_str_to_Array(json3_str::String)
d = JSON3.read(json3_str)
array = reshape(Array(d.Array), (d.size[1], d.size[2]))
return array
end
#------------------------------------------------------------------------------------------------100
""" Convert JSON3.read object to OrderedDict
# Example
dict = dictionary(["a"=>4, "b"=>6])
OrDict = OrderedDict(dict)
jsonString = JSON3.write(OrDict) # use jsonString to exchange. One can save it to file or send it thru pub/sub
jsonObject = JSON3.read(jsonString)
OrDict2 = JSON3read_to_OrDict(jsonObject) # example here
Adict2 = dictionary(OrDict2)
Andyferris's github https://github.com/andyferris/Dictionaries.jl
"""
function JSON3read_to_OrDict(x)
dict = OrderedDict()
for (k, v) in x
k = string(k)
dict[k] = v
end
return dict
end
#------------------------------------------------------------------------------------------------100
"""
print time of cpu executtion at the line inwhich this macro is used
"""
macro timeline(expr)
quote
print("line ", $(__source__.line), ": ")
@time $(esc(expr))
end
end
batchindex(batch_counter::Number, batch_size::Number; offset=0) =
(offset + (batch_counter-1) * batch_size + 1) : offset + (batch_counter * batch_size)
function flip_true_false(x::Bool)
if x == true
x = false
elseif x == false
x = true
else
error("undefined condition line $(@__LINE__)")
end
return x
end
function flip_true_false(x::Int)
if x == 1
x = 0
elseif x == 0
x = 1
else
throw("not define input of type $(typeof(x)) yet")
end
return x
end
"""
Return drawed index
# Example
drawed_index = randomWithProb([0.5, 0.2, 0.3])
"""
randomWithProb(probability::AbstractVector) = rand(Distributions.Categorical(probability)) # return drawed index
"""
Draw from choices according to its probability.
Probability range is 0.0 to 1.0 and all probability must summed up to 1
(may get probability from NNlib's softmax function)
# Example
draw = draw_choices([true, false, nothing], [0.5, 0.2, 0.3])
"""
function randomChoiceWithProb(choices::Array, probability::Array)
if length(choices) != length(probability)
error("random is not possible, choices array length != probability array length")
elseif sum(probability) != 1.0
error("probability does not sum to 1.0")
end
return choices[randomWithProb(probability)]
end
function randomChoiceOnTarget(target::Number, targetMatch::Number, choices::AbstractVector,
probability::AbstractVector)
if length(choices) != length(probability)
throw("random is not possible, choices array length != probability array length")
end
return target == targetMatch ? randomChoiceWithProb(choices, probability) : target
# dist = Distributions.Categorical(probability)
# draw_result = choices[rand(dist)]
end
function randomChoiceOnTarget(target::AbstractVector, choiceList::AbstractVector,
probability::AbstractVector)
return randomChoiceOnTarget.(target, 1, (choiceList,), (probability,))
end
function linearly_weighted_avg(a::Array)
total = 0.0
for (i, v) in enumerate(a)
total = total + (i * v)
end
return total / sum(a)
end
""" Convert String that is holded inside a variable to Symbol
# Example
x = "hello" # x is a variable holding String "hello" \n
y = variable_to_symbol(x) # y holds :hello
"""
function variable_str_to_symbol(variable)
semi = :($variable)
symbol = Symbol(semi)
return symbol
end
""" get useable type of specified fieldname inside a composite struct
# Example
julia> @Base.kwdef mutable struct some_struct
a::Union{Bool, Nothing} = nothing
b::Union{Float64, Nothing} = nothing
c::Union{Int64, AbstractFloat} = 3.5
d::Union{String, Nothing} = nothing
end
julia> a = some_struct()
julia> fieldname_useable_type(some_struct, :c) =result=> [Int64, Float64]
"""
function fieldname_useable_type(somestruct, fieldname::Symbol;
test_types=[2.0, 2, true, :h, "str", 'c', missing, nothing])::Vector{DataType}
new_instance = somestruct()
useable_type = []
for i in test_types
try
new_instance.:($fieldname) = i
type = typeof(new_instance.:($fieldname))
if type useable_type
push!(useable_type, type)
end
catch
end
end
return useable_type
end
function randomNoRepeat(drawOptions::Array, draw_number::Integer;
exclude_list::Union{AbstractArray,Nothing}=nothing)
draw_option = copy(drawOptions)
draw_option = isnothing(exclude_list) ? draw_option :
filter!(x -> x exclude_list, draw_option)
shuffle!(draw_option)
drawed_items = []
while length(drawed_items) < draw_number
push!(drawed_items, pop!(draw_option))
end
return drawed_items
end
""" using cron to schedule backup job by
1. sudo nano /etc/crontab <<< this is a system-wide cron file
2. to execute julia file @ 2.00am everyday add the following line at the buttom of the file
0 2 * * * root julia-1.7 /home/syncthing_backup_script.jl
Requirements using Dates
"""
function folderBackup(sourceFolderAbsolutePath::String, # absolute path to folder to be backuped
backupFolderAbsolutePath::String; # absolute path to folder used to store backup file
totalBackupFiles::Integer=7, # total backup file, the oldest will be deleted
containerName::Union{Array{String}, Nothing}=nothing) # container using source_folder
sep = (Sys.iswindows() ? "\\" : '/')
if sourceFolderAbsolutePath[end] == sep
sourceFolderAbsolutePath = sourceFolderAbsolutePath[1:end-1]
end
if backupFolderAbsolutePath[end] != sl
backupFolderAbsolutePath = backupFolderAbsolutePath * sep
end
if isdir(backupFolderAbsolutePath)
else
mkpath(backupFolderAbsolutePath)
end
# stop running docker container service
if containerName !== nothing
println("stop running services")
for i in containerName
try run(`docker stop $i`) catch; end
sleep(10) # wait for services to stop
end
end
# do backup
println("doing backup now")
timestamp = string(Dates.now())
name = split(sourceFolderAbsolutePath, sep)[end] * "--"
filename = name * timestamp * ".zip" # resulting compressed filename
run(`chmod -R a+rwx $sourceFolderAbsolutePath`)
# zip -r [destination+filename] [source folder to be zipped]
run(`zip -r $(backupFolderAbsolutePath * filename) $sourceFolderAbsolutePath`)
# check if total backup file is more than user specified, if yes, delete the oldest backup
backupFiles = readdir(backupFolderAbsolutePath)
while length(backupFiles) > totalBackupFiles
run(`rm $(backupFolderAbsolutePath * backupFiles[1])`)
backupFiles = readdir(backupFolderAbsolutePath)
end
# start docker services
if containerName !== nothing
println("start services")
for i in containerName
try run(`docker start $i`) catch; end
sleep(10) # wait for services to stop
end
end
end
function lowerclip!(data::AbstractVector, lowerbound::Number)
replace!(x -> x < lowerbound ? lowerbound : x, data)
end
function upperclip!(data::AbstractVector, upperbound::Number)
replace!(x -> x > upperbound ? upperbound : x, data)
end
function normalise(x::AbstractArray, mu, std)
ϵ = oftype(x[1], 1e-5)
μ = mu
# σ = std(x, dims=dims, mean=μ, corrected=false) # use this when Zygote#478 gets merged
σ = std
return (x .- μ) ./ (σ .+ ϵ)
end
function minMaxScaler(x::AbstractVector)
min = findmin(x)[1]
max = findmax(x)[1]
scaler(a::Number, min::Number, max::Number) = (a-min) / (max-min)
return scaler.(x, min, max)
end
""" a = [-1e200, -1e-200, 1e200, 1e-200] \n
result = vtclamp.(a, 1e-6, 1e6, -1e6, -1e-6)
"""
function customclamp(x::Number, poslo::Number, poshi::Number,
neglo::Number, neghi::Number)
signx = sign(x)
if signx == -1
if neghi < x < 0
return neghi
elseif x < neglo
return neglo
else
return x
end
elseif signx == +1
if poshi < x
return poshi
elseif 0 < x < poslo
return poslo
else
return x
end
end
end
function unitVec(x::AbstractVector)
y = (sum(x.^2))
return x./y
end
function replaceAt!(x::AbstractVector, ind::AbstractVector, value::Number)
for i in ind
x[i] = value
end
end
function signbitVec(x::AbstractVector)
sign = signbit.(x) * 1
signVec = replace(s -> s == 0 ? -1 : s, sign)
return signVec
end
function deleteall!(x::AbstractVector)
for i in 1:length(x)
deleteat!(x, 1)
end
end
""" Select specific range of vectors in a dict, return a new dict
# Example
dict = Dict(:a => [1:5...],
:b => [6:10...])
call -> selectRange(dict, 1:3)
return -> Dict{Any, Any} with 2 entries:
:a => [1, 2, 3]
:b => [6, 7, 8]
"""
function selectRange(d::Dict{Symbol, <:AbstractVector}, range)
newDict = Dict{Symbol, AbstractVector}()
for (k, v) in d
newDict[k] = v[range]
end
return newDict
end
""" Assign value to a given Dict by array of keys
# Example
d = Dict(
:a1=> Dict(:c=> 5),
:a2=> Dict(
:k=> 10,
:b=> Dict(
:s=> "target",
)
)
)
index = [:a2, :b, :s] \n
assignDict!(d, [:a2, :b, :s], "wow")
return 1 if no target key in a given dict.
"""
function assignDict!(dict::Dict, accessArray::Array{Symbol}, valueToAssign)
wd = nothing
for i in accessArray
println(i)
if i != accessArray[end]
if wd === nothing && haskey(dict, i)
wd = Ref(dict[i])
elseif wd.x !== nothing && haskey(wd.x, i)
wd = Ref(wd.x[i])
else
return 1 # error, no target key in a given dict.
end
else
wd.x[i] = valueToAssign
return 0
end
end
end
""" convert hour(0-23), minute(0-59) into julia time object
# Example
time
"""
function iTime(h::Integer, m::Integer)
if h == 0
h = 12
ampm = "am"
elseif 1 <= h <= 11
ampm = "am"
elseif h == 12
ampm = "pm"
elseif 13 <= h <= 23
h = h - 12
ampm = "pm"
else
error("hour out of range")
end
m = m < 10 ? "0$m" : m
t = "$h:$m$ampm"
return Time(t, "HH:MMp")
end
""" replace a number according to the limit
if value is lower than lowerbound return lowerbound replacement value
if value is more than upperbound return upperbound replacement value
# Example
limitvalue(4, (-5 => 0), (5 => 5))
"""
function limitvalue(v::Number, lowerbound::Pair, upperbound::Pair)
lwLimit, lwReplace = lowerbound
upLimit, upReplace = upperbound
if v < lwLimit
v = lwReplace
elseif v > upLimit
v = upReplace
else
end
return v
end
""" assign matrix b to matrix a according to matrix b's CartesianIndex
"""
cartesianAssign!(a::CuArray, b::CuArray) = @cuda cartesianAssign!(a, b)
function cartesianAssign!(a, b)
for (i, v) in enumerate(b)
a[CartesianIndices(b)[i].I...] = v
end
return nothing
end
function sumAlongDim3(a::Array)
totalDim = length(size(a))
if totalDim == 3
d1, d2, d3 = size(a)
r = zeros(1, 1, d3)
for i in 1:d3
view(r, 1, 1, i) .= sum(a[:, :, i])
end
elseif totalDim == 4
d1, d2, d3, d4 = size(a)
r = zeros(1, 1, d3, d4)
for j in 1:d4
for i in 1:d3
view(r, 1, 1, i, j) .= sum(a[:, :, i, j])
end
end
else
error("this condition is not define yet")
end
return r
end
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all slice of 3D another matrix ,b, and
concatenate at the 4th dimension.
Example
julia> input = rand(32, 32, 128) # batch at 3rd dim
julia> weight = rand(32, 32, 1024)
julia> r = matMul_3Dto3D_manyTo1batch(input, weight);
julia> size(r)
(32, 32, 1024, 128)
"""
function matMul_3Dto3D_manyTo1batch(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
asize = [size(a)...]
bsize = [size(b)...]
if resultStorage === nothing
resultStorage = similar(a, eltype(b), bsize[1], bsize[2], bsize[3], asize[3])
end
c = [slice .* b for slice in eachslice(a, dims=3)]
resultStorage .= cat(c..., dims=4)
return resultStorage
end
# function matMul_3Dto3D_manyTo1batch(a::CuArray, b::CuArray; # XXX working code
# resultStorage::Union{CuArray, Nothing}=nothing, threads=256)
# asize = [size(a)...]
# bsize = [size(b)...]
# if resultStorage === nothing
# resultStorage = similar(a, eltype(b), bsize[1], bsize[2], bsize[3], asize[3]) |> gpu
# end
# CUDA.@sync begin
# @cuda threads=threads matMul_3Dto3D_manyTo1batch_gpu!(a, b, resultStorage)
# end
# return resultStorage
# end
# function matMul_3Dto3D_manyTo1batch_gpu!(a, b, resultStorage) # XXX working code
# _, _, _, p = size(resultStorage)
# index = threadIdx().x # this example only requires linear indexing, so just use `x`
# stride = blockDim().x
# for i in index:stride:p
# view(resultStorage, :, :, :, i) .= view(a, :, :, i) .* b
# end
# return nothing
# end
""" GPU kernel
"""
function matMul_3Dto3D_manyTo1batch_gpu!(a, b, resultStorage)
_, _, batch = size(a) # This kernel use 1 thread per batch
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
if i <= batch # guard against unused threads to accessing memory out of bound
# @cuprintln("thread $i")
view(resultStorage, :, :, :, i) .= view(a, :, :, i) .* b
end
return nothing
end
""" GPU version of batchMatEleMul
Example
julia> using Flux, CUDA
julia> device = Flux.CUDA.functional() ? gpu : cpu
julia> if device == gpu CUDA.device!(0) end
julia> input = rand(32, 32, 128) |> gpu; # 128-batches
julia> weight = rand(32, 32, 1024) |> gpu; # 1-batch
julia> r = matMul_3Dto3D_manyTo1batch(input, weight);
julia> size(r)
(32, 32, 1024, 128)
"""
function matMul_3Dto3D_manyTo1batch(a::CuArray, b::CuArray;
resultStorage::Union{CuArray, Nothing}=nothing)
asize = [size(a)...]
bsize = [size(b)...]
if resultStorage === nothing
resultStorage = similar(a, eltype(b), bsize[1], bsize[2], bsize[3], asize[3]) |> gpu
end
kernel = @cuda launch=false matMul_3Dto3D_manyTo1batch_gpu!(a, b, resultStorage)
config = launch_configuration(kernel.fun)
# threads to be launched. Since one can't launch exact thread number the kernel needs,
# one just launch threads more than this kernel needs then use a guard inside the kernel
# to prevent unused threads to access memory.
threads = min(1024, config.threads) # most NVIDIA gpu has 1024 threads per block
blocks = cld(asize[3], threads) # This kernel use 1 thread per batch
CUDA.@sync begin
kernel(a, b, resultStorage; threads, blocks)
end
return resultStorage
end
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all batch of another 4D matrix ,b, and
concatenate at the 4th dimension.
Example
julia>
julia> a = rand(2,2,3) # 3-batches
julia> b = rand(2,2,4,3) # 3-batches
julia> r = GeneralUtils.matMul_3Dto4D_batchwise(a, b);
julia> size(r)
(2, 2, 4, 3)
"""
function matMul_3Dto4D_batchwise(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
asize = [size(a)...]
bsize = [size(b)...]
if asize[end] != bsize[end]
error("batch number of a and b must be equal")
end
if resultStorage === nothing
resultStorage = zeros(bsize[1], bsize[2], bsize[3], asize[3])
end
for i in 1:asize[3]
view(resultStorage, :, :, :, i) .= a[:, :, i] .* b[:, :, :, i]
end
return resultStorage
end
end # module