Files
GeneralUtils/src/interface.jl
2024-09-13 15:07:19 +07:00

1186 lines
35 KiB
Julia
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
module interface
export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue, replaceMoreThan,
replaceLessThan, replaceBetween, cartesianAssign!, sumAlongDim3, matMul3Dto3DmanyTo1batch,
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax,
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
getDictPath, dataframeToCSV
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV
using ..util, ..communication
# ---------------------------------------------- 100 --------------------------------------------- #
noNegative!(a::AbstractVector) = replace!(x -> x < 0 ? 0 : x, a)
findNotZero(x::AbstractVector) = findall( (!iszero).(x) )
replaceMoreThan(x, target, replaceValue) = x > target ? replaceValue : x
replaceMoreThan(x, target, a, b) = x > target ? a : b
replaceLessThan(x, target, replaceValue) = x < target ? replaceValue : x
replaceLessThan(x, target, a, b) = x < target ? a : b
replaceBetween(x, lowerbound, upperbound, replaceValue) = lowerbound < x < upperbound ? replaceValue : x
precision(x::Array{<:Array}) = ( std(mean.(x)) / mean(mean.(x)) ) * 100
precision(x::Array) = std(x) / mean(x) * 100
replaceAt!(x::AbstractVector, ind::Number, value::Number) = x[ind] = value
notZero(x::AbstractVector) = (!iszero).(x)
Zero(x::AbstractVector) = iszero.(x)
isNan(x::AbstractVector) = isnan.(x)
isInf(x::Number) = abs(x) === Inf
isInf(x::AbstractVector) = isinf.(x)
isNotEqual(x::Number, target::Number) = isequal(isequal(x, target), 0)
isBetween(x, lowerlimit, upperlimit) = lowerlimit < x < upperlimit ? true : false
absolute(x::AbstractVector) = abs.(x)
vecEleMul(x::AbstractVector, y::AbstractVector) = x .* y
vecEleMul(x::Number, y::AbstractVector) = x .* y
expDecay(initialValue::Number, decayFactor::Number, timePass::Number) =
initialValue * (1 - decayFactor)^timePass
mul!(x::AbstractVector, y::AbstractVector) = x .*= y
mul(x::AbstractVector, y::AbstractVector) = x .* y
allTrue(args...) = false [args...] ? false : true
ReLu(x::Number) = max(0, x)
updateVector!(x::AbstractVector, target::Number) = x .= target
updateVector!(x::AbstractVector, target::AbstractArray) = x .= target
function selectAdd!(x::AbstractVector, ind::AbstractVector, value::AbstractVector)
@. x = x + (ind * value)
end
""" FindIndex(input::String, target::Char)
Arguments:
text, input text
target, target character
Return:
(a bool vector of match/not match, position vector of the matched)
Example:
```jldoctest
julia> using GeneralUtils
julia> text = "Hello World!"
julia> findIndex(text, 'l')
(Bool[0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0], [3, 4, 10])
```
"""
function findIndex(text::String, target::Char)
charlist = [i for i in text]
match_position = isequal.(charlist, target)
match_index = findall(isequal.(match_position, 1))
return match_position, match_index
end
function findIndex(input::Array, target::Number)
match_position = isequal.(input, target)
match_index = findall(match_position)
return match_position, match_index
end
# function findIndex(input::Array, target::Array)
# match_position = isone.(zeros(length(input)))
# for i in target
# match_position = match_position + isequal.(input, i)
# end
# match_position = replaceMoreThan.(match_position, 1)
# match_index = findall(isone.(match_position)) # Findall donot work with Int64 vector [1, 0, 0, 1].
# # It only works with BitVector. isone() converts Int64 vector [1, 0, 0, 1] into
# # BitVector [1, 0, 0, 1]
# return match_position, match_index
# end
function findIndex(input::Array, target::Symbol)
match_position = isequal.(input, target)
match_index = findall(match_position)
return match_position, match_index
end
function findIndex(collection::Array{String}, target::String)
match_position = isequal.(collection, target)
match_index = findall(match_position)
return match_position, match_index
end
function findIndex(collection::Array{String}, target::Array{String})
match_position = nothing
match_index = nothing
for i in target
match_pos = isequal.(collection, i)
match_ind = findall(match_pos)
if match_position === nothing
match_position = match_pos
else
match_position = hcat(match_position, match_pos)
end
if match_index === nothing
match_index = match_ind
else
match_index = hcat(match_index, match_ind)
end
end
return match_position, match_index
end
function findIndex(collection::OrderedDict, target::Symbol)
collection_keys = keys(collection)
collection_keys_array = [i for i in collection_keys]
match_position = isequal.(collection_keys_array, target)
match_index = findall(match_position)
return match_position, match_index
end
function findMax(collection::AbstractVector)
maxValue, maxIndex = findmax(collection)
matchPosition = isequal.(collection, maxValue)
return maxValue, maxIndex, matchPosition
end
""" read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
with multiple text file in a folder,
this function read x_th text file in a folder (filename is sorted by OS)
# Example
utils.read_textfile_by_index(cleaned_data_path, 2)
read 2nd txt file in a folder
"""
function read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
if isdir(folder_path)
filenumber = length(readdir(folder_path))
if read_file_number > filenumber
error("you specified read_file_number = $read_file_number which is out
of range, the cleaned data folder has only $filenumber files")
return nothing, nothing, nothing
else
content = 0
# open each file in the directory and read
filename = readdir(folder_path, join=true, sort=false)[read_file_number]
f = open(filename)
content = readlines(f)
# content = read(f)
close(f)
end
return read_file_number, filename, content
else
error("ERROR no file or folder at $folder_path")
return nothing, nothing, nothing
end
end
# ---------------------------------------------- 100 --------------------------------------------- #
""" Array_to_JSON3_str(data::AbstractArray)
encode Array to JSON3 String
# Example
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
json3_str = Array_to_JSON3_str(a)
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
"""
function Array_to_JSON3_str(data::AbstractArray)
d = Dict("Array"=> data, "size"=>size(data))
json3_str = JSON3.write(d)
return json3_str
end
# ---------------------------------------------- 100 --------------------------------------------- #
""" JSON3_str_to_Array(json3_str::String)
decode JSON3 String to Array
# Example
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
a = JSON3_str_to_Array(json3_str)
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
"""
function JSON3_str_to_Array(json3_str::String)
d = JSON3.read(json3_str)
array = reshape(Array(d.Array), (d.size[1], d.size[2]))
return array
end
# ---------------------------------------------- 100 --------------------------------------------- #
""" Convert JSON3.read object to OrderedDict
# Example
dict = dictionary(["a"=>4, "b"=>6])
OrDict = OrderedDict(dict)
jsonString = JSON3.write(OrDict) # use jsonString to exchange. One can save it to file or send it thru pub/sub
jsonObject = JSON3.read(jsonString)
OrDict2 = JSON3read_to_OrDict(jsonObject) # example here
Adict2 = dictionary(OrDict2)
Andyferris's github https://github.com/andyferris/Dictionaries.jl
"""
function JSON3read_to_OrDict(x)
dict = OrderedDict()
for (k, v) in x
k = string(k)
dict[k] = v
end
return dict
end
#------------------------------------------------------------------------------------------------100
"""
print time of cpu executtion at the line inwhich this macro is used
"""
macro timeline(expr)
quote
print("line ", $(__source__.line), ": ")
@time $(esc(expr))
end
end
batchindex(batch_counter::Number, batch_size::Number; offset=0) =
(offset + (batch_counter-1) * batch_size + 1) : offset + (batch_counter * batch_size)
function flip_true_false(x::Bool)
if x == true
x = false
elseif x == false
x = true
else
error("undefined condition line $(@__LINE__)")
end
return x
end
function flip_true_false(x::Int)
if x == 1
x = 0
elseif x == 0
x = 1
else
throw("not define input of type $(typeof(x)) yet")
end
return x
end
"""
Return drawed index
# Example
drawed_index = randomWithProb([0.5, 0.2, 0.3])
"""
randomWithProb(probability::AbstractVector) = rand(Distributions.Categorical(probability)) # return drawed index
"""
Draw from choices according to its probability.
Probability range is 0.0 to 1.0 and all probability must summed up to 1
(may get probability from NNlib's softmax function)
# Example
draw = randomChoiceWithProb([true, false, nothing], [0.5, 0.2, 0.3])
"""
function randomChoiceWithProb(choices::Array, probability::Array)
if length(choices) != length(probability)
error("random is not possible, choices array length != probability array length")
elseif sum(probability) != 1.0
error("probability does not sum to 1.0")
end
return choices[randomWithProb(probability)]
end
function randomChoiceOnTarget(target::Number, targetMatch::Number, choices::AbstractVector,
probability::AbstractVector)
if length(choices) != length(probability)
throw("random is not possible, choices array length != probability array length")
end
return target == targetMatch ? randomChoiceWithProb(choices, probability) : target
# dist = Distributions.Categorical(probability)
# draw_result = choices[rand(dist)]
end
function randomChoiceOnTarget(target::AbstractVector, choiceList::AbstractVector,
probability::AbstractVector)
return randomChoiceOnTarget.(target, 1, (choiceList,), (probability,))
end
function linearly_weighted_avg(a::Array)
total = 0.0
for (i, v) in enumerate(a)
total = total + (i * v)
end
return total / sum(a)
end
""" Convert String that is holded inside a variable to Symbol
# Example
x = "hello" # x is a variable holding String "hello" \n
y = variable_to_symbol(x) # y holds :hello
"""
function variable_str_to_symbol(variable)
semi = :($variable)
symbol = Symbol(semi)
return symbol
end
""" get useable type of specified fieldname inside a composite struct
# Example
julia> @Base.kwdef mutable struct some_struct
a::Union{Bool, Nothing} = nothing
b::Union{Float64, Nothing} = nothing
c::Union{Int64, AbstractFloat} = 3.5
d::Union{String, Nothing} = nothing
end
julia> a = some_struct()
julia> fieldname_useable_type(some_struct, :c) =result=> [Int64, Float64]
"""
function fieldname_useable_type(somestruct, fieldname::Symbol;
test_types=[2.0, 2, true, :h, "str", 'c', missing, nothing])::Vector{DataType}
new_instance = somestruct()
useable_type = []
for i in test_types
try
new_instance.:($fieldname) = i
type = typeof(new_instance.:($fieldname))
if type useable_type
push!(useable_type, type)
end
catch
end
end
return useable_type
end
function randomNoRepeat(drawOptions::Array, draw_number::Integer;
exclude_list::Union{AbstractArray,Nothing}=nothing)
draw_option = copy(drawOptions)
draw_option = isnothing(exclude_list) ? draw_option :
filter!(x -> x exclude_list, draw_option)
shuffle!(draw_option)
drawed_items = []
while length(drawed_items) < draw_number
push!(drawed_items, pop!(draw_option))
end
return drawed_items
end
""" using cron to schedule backup job by
1. sudo nano /etc/crontab <<< this is a system-wide cron file
2. to execute julia file @ 2.00am everyday add the following line at the buttom of the file
0 2 * * * root julia-1.7 /home/syncthing_backup_script.jl
Requirements using Dates
"""
function folderBackup(sourceFolderAbsolutePath::String, # absolute path to folder to be backuped
backupFolderAbsolutePath::String; # absolute path to folder used to store backup file
totalBackupFiles::Integer=7, # total backup file, the oldest will be deleted
containerName::Union{Array{String}, Nothing}=nothing) # container using source_folder
sep = (Sys.iswindows() ? "\\" : '/')
if sourceFolderAbsolutePath[end] == sep
sourceFolderAbsolutePath = sourceFolderAbsolutePath[1:end-1]
end
if backupFolderAbsolutePath[end] != sl
backupFolderAbsolutePath = backupFolderAbsolutePath * sep
end
if isdir(backupFolderAbsolutePath)
else
mkpath(backupFolderAbsolutePath)
end
# stop running docker container service
if containerName !== nothing
println("stop running services")
for i in containerName
try run(`docker stop $i`) catch; end
sleep(10) # wait for services to stop
end
end
# do backup
println("doing backup now")
timestamp = string(Dates.now())
name = split(sourceFolderAbsolutePath, sep)[end] * "--"
filename = name * timestamp * ".zip" # resulting compressed filename
run(`chmod -R a+rwx $sourceFolderAbsolutePath`)
# zip -r [destination+filename] [source folder to be zipped]
run(`zip -r $(backupFolderAbsolutePath * filename) $sourceFolderAbsolutePath`)
# check if total backup file is more than user specified, if yes, delete the oldest backup
backupFiles = readdir(backupFolderAbsolutePath)
while length(backupFiles) > totalBackupFiles
run(`rm $(backupFolderAbsolutePath * backupFiles[1])`)
backupFiles = readdir(backupFolderAbsolutePath)
end
# start docker services
if containerName !== nothing
println("start services")
for i in containerName
try run(`docker start $i`) catch; end
sleep(10) # wait for services to stop
end
end
end
function lowerclip!(data::AbstractVector, lowerbound::Number)
replace!(x -> x < lowerbound ? lowerbound : x, data)
end
function upperclip!(data::AbstractVector, upperbound::Number)
replace!(x -> x > upperbound ? upperbound : x, data)
end
function normalise(x::AbstractArray, mu, std)
ϵ = oftype(x[1], 1e-5)
μ = mu
# σ = std(x, dims=dims, mean=μ, corrected=false) # use this when Zygote#478 gets merged
σ = std
return (x .- μ) ./ (σ .+ ϵ)
end
function minMaxScaler(x::AbstractVector)
min = findmin(x)[1]
max = findmax(x)[1]
scaler(a::Number, min::Number, max::Number) = (a-min) / (max-min)
return scaler.(x, min, max)
end
""" a = [-1e200, -1e-200, 1e200, 1e-200] \n
result = vtclamp.(a, 1e-6, 1e6, -1e6, -1e-6)
"""
function customclamp(x::Number, poslo::Number, poshi::Number,
neglo::Number, neghi::Number)
signx = sign(x)
if signx == -1
if neghi < x < 0
return neghi
elseif x < neglo
return neglo
else
return x
end
elseif signx == +1
if poshi < x
return poshi
elseif 0 < x < poslo
return poslo
else
return x
end
end
end
function unitVec(x::AbstractVector)
y = (sum(x.^2))
return x./y
end
function replaceAt!(x::AbstractVector, ind::AbstractVector, value::Number)
for i in ind
x[i] = value
end
end
function signbitVec(x::AbstractVector)
sign = signbit.(x) * 1
signVec = replace(s -> s == 0 ? -1 : s, sign)
return signVec
end
function deleteall!(x::AbstractVector)
for i in 1:length(x)
deleteat!(x, 1)
end
end
""" Select specific range of vectors in a dict, return a new dict
# Example
dict = Dict(:a => [1:5...],
:b => [6:10...])
call -> selectRange(dict, 1:3)
return -> Dict{Any, Any} with 2 entries:
:a => [1, 2, 3]
:b => [6, 7, 8]
"""
function selectRange(d::Dict{Symbol, <:AbstractVector}, range)
newDict = Dict{Symbol, AbstractVector}()
for (k, v) in d
newDict[k] = v[range]
end
return newDict
end
""" Assign value to a given Dict by array of keys
# Example
d = Dict(
:a1=> Dict(:c=> 5),
:a2=> Dict(
:k=> 10,
:b=> Dict(
:s=> "target",
)
)
)
index = [:a2, :b, :s] \n
assignDict!(d, [:a2, :b, :s], "wow")
return 1 if no target key in a given dict.
"""
function assignDict!(dict::Dict, accessArray::Array{Symbol}, valueToAssign)
wd = nothing
for i in accessArray
println(i)
if i != accessArray[end]
if wd === nothing && haskey(dict, i)
wd = Ref(dict[i])
elseif wd.x !== nothing && haskey(wd.x, i)
wd = Ref(wd.x[i])
else
return 1 # error, no target key in a given dict.
end
else
wd.x[i] = valueToAssign
return 0
end
end
end
""" convert hour(0-23), minute(0-59) into julia time object
# Example
time
"""
function iTime(h::Integer, m::Integer)
if h == 0
h = 12
ampm = "am"
elseif 1 <= h <= 11
ampm = "am"
elseif h == 12
ampm = "pm"
elseif 13 <= h <= 23
h = h - 12
ampm = "pm"
else
error("hour out of range")
end
m = m < 10 ? "0$m" : m
t = "$h:$m$ampm"
return Time(t, "HH:MMp")
end
""" replace a number according to the limit
if value is lower than lowerbound return lowerbound replacement value
if value is more than upperbound return upperbound replacement value
# Example
limitvalue(4, (-5 => 0), (5 => 5))
"""
function limitvalue(v::Number, lowerbound::Pair, upperbound::Pair)
lwLimit, lwReplace = lowerbound
upLimit, upReplace = upperbound
if v < lwLimit
v = lwReplace
elseif v > upLimit
v = upReplace
else
end
return v
end
""" Assign matrix b to matrix a according to matrix b's CartesianIndex.
Arguments:\n
a : target matrix.
b : source matrix.
Return:\n
Resulting matrix a.
Example:\n
```jldoctest
julia> not done yet
```
"""
function cartesianAssign!(a, b)
for (i, v) in enumerate(b)
a[CartesianIndices(b)[i].I...] = v
end
return nothing
end
function sumAlongDim3(a::Array)
totalDim = length(size(a))
if totalDim == 3
d1, d2, d3 = size(a)
r = zeros(1, 1, d3)
for i in 1:d3
view(r, 1, 1, i) .= sum(a[:, :, i])
end
elseif totalDim == 4
d1, d2, d3, d4 = size(a)
r = zeros(1, 1, d3, d4)
for j in 1:d4
for i in 1:d3
view(r, 1, 1, i, j) .= sum(a[:, :, i, j])
end
end
else
error("this condition is not define yet")
end
return r
end
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all slice of 3D another matrix ,b, and
concatenate at the 4th dimension.
Example
julia> input = rand(32, 32, 128) # batch at 3rd dim
julia> weight = rand(32, 32, 1024)
julia> r = matMul3Dto3DmanyTo1batch(input, weight);
julia> size(r)
(32, 32, 1024, 128)
"""
function matMul3Dto3DmanyTo1batch(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
asize = [size(a)...]
bsize = [size(b)...]
if resultStorage === nothing
resultStorage = similar(a, eltype(b), bsize[1], bsize[2], bsize[3], asize[3])
end
c = [slice .* b for slice in eachslice(a, dims=3)]
resultStorage .= cat(c..., dims=4)
return resultStorage
end
""" GPU kernel
"""
function matMul3Dto3DmanyTo1batch_gpu!(a, b, resultStorage, linearToCartesian)
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
if i <= size(a, 3) # guard against unused threads to accessing memory out of bound
cartesianIndex = linearToCartesian(i, size(b)) # example for how to send "inner" function to gpu
# @cuprintln("gpu thread $i $cartesianIndex[2]")
@. @views resultStorage[:, :, :, i] = a[ :, :, i] * b
# view(resultStorage, :, :, :, i) .= view(a, :, :, i) .* b # alternative code
# @view(resultStorage[:, :, :, i]) .= @view(a[ :, :, i]) .* b # alternative code
end
return nothing
end
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all batch of another 4D matrix ,b, and
concatenate at the 4th dimension.
Example
julia>
julia> a = rand(2,2,3) # 3-batches
julia> b = rand(2,2,4,3) # 3-batches
julia> r = GeneralUtils.matMul_3Dto4D_batchwise(a, b);
julia> size(r)
(2, 2, 4, 3)
"""
function matMul_3Dto4D_batchwise(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
if size(a, 3) != size(b, 4)
error("batch number of a and b must be equal")
end
if resultStorage === nothing
resultStorage = zeros(size(b, 1), size(b, 2), size(b, 3), size(a, 3))
end
for i in 1:size(a, 3)
view(resultStorage, :, :, :, i) .= a[:, :, i] .* b[:, :, :, i]
end
return resultStorage
end
""" GPU-compatible linear index to cartesian index conversion
"""
function linearToCartesian(i::Int, arraySize::NTuple{4,Int})
# Check that the linear coordinate is valid
# prod(arraySize) is the same as *(arraySize...). they multipy all elements in an array.
# but this code use prod() because splat breaks GPU performance
if i < 1 || i > prod(arraySize)
error("Invalid linear coordinate")
end
# Extract the dimensions of the matrix
n1, n2, n3, n4 = arraySize
# Compute the cartesian coordinate using rem and div functions
i1 = ((i-1) % (n1)) + 1 # +1 convert 0-based to 1-based index
i2 = ((i-1) ÷ (n1)) % n2 + 1
i3 = ((i-1) ÷ (n1*n2)) % n3 + 1
i4 = (i-1) ÷ (n1*n2*n3) + 1
# Return the cartesian coordinate as a tuple
return (i1, i2, i3, i4)
end
""" return a vector with true at max value and false for other value.
if vector is all-zeros, return all-false vector.
"""
function vectorMax(x)
if sum(isNotEqual.(x, 0)) == 0 # guard against all-zeros array
# instead of returning all-zeros original vector,
# return all-false vector to prevent type instability
return isNotEqual.(x, 0)
else
return isequal.(x, maximum(x))
end
end
function multiply_last(matrix, x, n)
# X is the scalar to multiply
# matrix is the column-major 2D matrix
# n is the number of elements to be multiplied, starting from the last one
# returns a new matrix with the same shape as the original one
# get the number of rows and columns of the matrix
rows, cols = size(matrix)
# create a copy of the matrix to avoid mutating the original one
result = copy(matrix)
# loop over the last n elements in column-major order
for i in (rows * cols - n + 1):(rows * cols)
# get the row and column indices of the current element
row = (i - 1) % rows + 1
col = (i - 1) ÷ rows + 1
# multiply the element by X and store it in the result matrix
result[row, col] *= x
end
# return the result matrix
return result
end
function multiplyRandomElements(A, x, n, rng=MersenneTwister(1234))
# rng is a random number generator object, see https://docs.julialang.org/en/v1/stdlib/Random/
# x is a scalar value to multiply by
# A is a column-major 2D matrix or a vector
# n is the number of elements to be multiplied
# returns a new array with n randomly chosen distinct elements multiplied by x
B = copy(A) # make a copy of A to avoid mutating it
d = ndims(A) # get the number of dimensions of A
if d == 1 # if A is a vector
m = length(A) # get the length of A
indices = collect(1:m) # create an array of indices from 1 to m
shuffle!(rng, indices) # shuffle the indices in-place using the RNG
for i in 1:n # loop n times
j = indices[i] # get the i-th shuffled index
B[j] *= x # multiply the element at j by x
end
elseif d == 2 # if A is a matrix
m = size(A, 1) # number of rows in A
p = size(A, 2) # number of columns in A
indices = collect(1:m*p) # create an array of linear indices from 1 to m*p
shuffle!(rng, indices) # shuffle the indices in-place using the RNG
for i in 1:n # loop n times
j = indices[i] # get the i-th shuffled index
B[j] *= x # multiply the element at j by x
end
else # if A is neither a vector nor a matrix
error("A must be a vector or a matrix")
end
return B # return the new array
end
""" Randomly (rng controlled) choose position of elements that has value, markValue, from matrix mask and
replace matrix A's elements of the same position with value, a.
Example
julia> mask = rand([-1,0,1],4,4,1)
julia> A = rand(4,4,1)
julia> C = replaceElements(mask, A, -1, 5.0, 3)
"""
function replaceElements(mask::AbstractArray{<:Any}, markValue::Number, A::AbstractArray{<:Any}, a::Number,
n::Int=0; rng::AbstractRNG=MersenneTwister(1234))
""" Prompt
Write a julia function to operate on column-major 3D matrix. The function randomly
choose elements in matrix mask that has value markValue and replace elements in matrix A at
the same position with value a. The choosing randomness is controlled by rng function.
I also want to specify how many elements to be replaced.
"""
total_x_tobeReplced = sum(isequal.(mask, markValue))
if n == 0 || n > total_x_tobeReplced
n = total_x_tobeReplced
end
# check if mask and A have the same size
if size(mask) != size(A)
error("mask and A must have the same size")
end
C = copy(A)
# get the indices of elements in mask that equal markValue
indices = findall(x -> x == markValue, mask)
# shuffle the indices using the rng function
shuffle!(rng, indices)
# select the first n indices
selected = indices[1:n]
# replace the elements in A at the selected positions with a
for i in selected
C[i] = a
end
return C
end
""" Randomly (rng controlled) choose position of elements that has value, markValue, from matrix mask and
replace matrix A's elements of the same position with value, a. if n == 0, all marked value is replaced
Example
julia> mask = rand([-1,0,1],4,4,1)
julia> A = rand(4,4,1)
julia> C = replaceElements(mask, A, -1, 5.0, 3)
"""
function replaceElements!(mask::AbstractArray{<:Any}, markValue::Number, A::AbstractArray{<:Any}, a::Number,
n::Int=0; rng::AbstractRNG=MersenneTwister(1234))
total_x_tobeReplced = sum(isequal.(mask, markValue))
remaining = 0
if n == 0 || n > total_x_tobeReplced
remaining = n - total_x_tobeReplced
n = total_x_tobeReplced
end
# check if mask and A have the same size
if size(mask) != size(A)
error("mask and A must have the same size, mask $(size(mask)) A $(size(A))")
end
# get the indices of elements in mask that equal markValue
indices = findall(x -> x == markValue, mask)
# shuffle the indices using the rng function
shuffle!(rng, indices)
# select the first n indices
selected = indices[1:n]
# replace the elements in A at the selected positions with a
for i in selected
A[i] = a
end
return remaining
end
""" Replace n elements that has value x with user specified value a.
"""
function replaceElements(A::AbstractArray{<:Any}, x::Number, a::Number, n::Int=0, rng=MersenneTwister(1234))
total_x_tobeReplced = sum(isequal.(A, x))
if n == 0 || n > total_x_tobeReplced
n = total_x_tobeReplced
end
B = copy(A)
# A is a column-major 3D matrix
# x is the value to be replaced
# a is the new value
# rng is a random number generator function
# n is the number of elements to be replaced
# find the indices of elements in A that equal x
indices = findall(==(x), B)
# shuffle the indices using the rng function
shuffle!(rng, indices)
# select the first n indices
selected = indices[1:n]
# replace the elements at the selected indices with a
for i in selected
B[i] = a
end
# return the modified matrix A
return B
end
function replaceElements!(A::AbstractArray{<:Any}, x::Number, a::Number, n::Int=0, rng=MersenneTwister(1234))
total_x_tobeReplced = sum(isequal.(A, x))
if n == 0 || n > total_x_tobeReplced
n = total_x_tobeReplced
end
# A is a column-major 3D matrix
# x is the value to be replaced
# a is the new value
# rng is a random number generator function
# n is the number of elements to be replaced
# find the indices of elements in A that equal x
indices = findall(==(x), A)
# shuffle the indices using the rng function
shuffle!(rng, indices)
# select the first n indices
selected = indices[1:n]
# replace the elements at the selected indices with a
for i in selected
A[i] = a
end
end
""" Get characters between specified characters.
# Arguments
- `text::T`
a text being searched
- `startChar::Char`
start character
- `endChar::Char`
end character
# Keyword Arguments
- `endCharLocation::String`
end character position after startChar. Can be "next" or "end". "next" means the closed
endChar just after startChar. "end" means the furthest endChar.
- `includeChar::Bool`
whether to include the startChar and endChar. Default is true
# Return
the characters between specified characters.
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> text = "{\"ask\": {\"text\": \"Could you please tell me about the special event?\"\n}}\n\n"
julia> GeneralUtils.getStringBetweenCharacters(text, '{', '}', endCharLocation="end")
"{\"ask\": {\"text\": \"Could you please tell me about the special event?\"\n}}"
```
# Signature
"""
function getStringBetweenCharacters(text::T, startChar::Char, endChar::Char;
endCharLocation::String="next", includeChar::Bool=true)::String where {T<:AbstractString}
# get the position of the startChar
startCharPosition = findfirst(startChar, text)
endCharPosition = nothing
if endCharLocation == "end"
# get the first position of the endChar coming from the end of text
endCharPosition = findlast(endChar, text)
elseif endCharLocation == "next"
# get the first position of the endChar after startCharPosition
endCharPosition = findnext(endChar, text, startCharPosition + 1)
else
error("endCharPositio must be \"end\" or \"next\"")
end
@show startCharPosition, endCharPosition
# get characters between startChar and endChar from text
extractedText = text[startCharPosition:endCharPosition]
# convert substring to string
extractedText = string(extractedText)
extractedText = includeChar == true ? extractedText : extractedText[2:end-1]
return extractedText
end
""" Read JSON string and return a dictionary with string key. (JSON3 defaults to symbol key)
This function solve the problem of reading JSON with string key.
Arguments:
jsonString::String
Return:
a dictionary with string key
Example:
```jldoctest
julia> jsonString = {\"wine type\": \"Red\", \"intensity level\": \"medium-bodied\"}
julia> JSON3read_stringKey(jsonString)
Dict{String, Any} with 2 entries:
"intensity level" => "medium-bodied"
"wine type" => "Red"
```
"""
function JSON3read_stringKey(jsonString::AbstractString)
jsonobj = JSON3.read(jsonString)
newDict = OrderedDict{String,Any}()
for (k,v) in jsonobj
newDict[string(k)] = v
end
return newDict
end
""" Create nested dict path if it does not already exist. The same concept as Julia's mkpath()
# Arguments
- `dict::Dict`
target dict
- `addkeys::Union{Vector{String}, Vector{Symbol}}`
keys to be added to dict
- `value`
value to be added to dict at final key in keypath
# Return
- dict with added keypath
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict{String, Any}("a" => Dict{String, Any}("b" => 10))
julia> GeneralUtils.mkDictPath!(d, ["a", "v", "x", "y", "z"], 42)
Dict{String, Any} with 1 entry:
"path" => Dict{Any, Any}("to"=>Dict{Any, Any}("nested"=>Dict{Any, Any}("value"=>42)))
```
# Signature
"""
function mkDictPath!(dict::Union{Dict{Symbol, Any}, Dict{String, Any}},
addkeys::Union{Vector{String}, Vector{Symbol}}, value)
# new key and existing key must be the same type
if !isempty(dict)
existingKeys = [key for key in keys(dict)]
if typeof(existingKeys[1]) != typeof(addkeys[1])
error("Type of keys being added is $(typeof(addkeys[1])) but type of existing keys is $(typeof(existingKeys[1]))")
end
end
for key in addkeys[1:end-1]
if !haskey(dict, key)
key_type = eltype(keys(dict))
dict[key] = Dict{key_type, Any}()
end
dict = dict[key]
end
return dict[addkeys[end]] = value
end
""" Get nested dict value using a vector of keys
# Arguments
- `dict::Dict`
target dict
- `keys::Vector`
keys vector
# Return
- dict with added keypath
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict{Symbol, Any}(:a => Dict{Symbol, Any}(:b => 10))
julia> GeneralUtils.getDictPath(d, [:a, :b])
10
```
# Signature
"""
function getDictPath(dict::Dict, keys::Vector)
current_dict = dict
for key in keys[1:end-1]
if haskey(current_dict, key)
current_dict = current_dict[key]
else
throw(ArgumentError("Key $key not found in dictionary"))
end
end
last_key = keys[end]
if haskey(current_dict, last_key)
return current_dict[last_key]
else
throw(ArgumentError("Key $last_key not found in dictionary"))
end
end
""" Convert a dataframe into CSV.
# Arguments
- `df::DataFrame`
A connection object to Postgres database
# Return
- `result::String`
# Example
```jldoctest
julia> using DataFrames, GeneralUtils
julia> df = DataFrame(A=1:3, B=5:7, fixed=1)
julia> result = GeneralUtils.dataframeToCSV(df)
```
# Signature
"""
function dataframeToCSV(df::DataFrame)
# Create an IOBuffer to capture the output
io = IOBuffer()
CSV.write(io, df)
dfStr = String(take!(io))
return dfStr
end
end # module