1399 lines
41 KiB
Julia
1399 lines
41 KiB
Julia
module interface
|
||
|
||
|
||
export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue, replaceMoreThan,
|
||
replaceLessThan, replaceBetween, cartesianAssign!, sumAlongDim3, matMul3Dto3DmanyTo1batch,
|
||
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
|
||
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
|
||
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
|
||
getDictPath, detectKeywordVariation, textToDict
|
||
|
||
using JSON, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames, CSV
|
||
using ..util, ..communication
|
||
#[WORKING] update code to use JSON
|
||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||
|
||
noNegative!(a::AbstractVector) = replace!(x -> x < 0 ? 0 : x, a)
|
||
findNotZero(x::AbstractVector) = findall( (!iszero).(x) )
|
||
replaceMoreThan(x, target, replaceValue) = x > target ? replaceValue : x
|
||
replaceMoreThan(x, target, a, b) = x > target ? a : b
|
||
replaceLessThan(x, target, replaceValue) = x < target ? replaceValue : x
|
||
replaceLessThan(x, target, a, b) = x < target ? a : b
|
||
replaceBetween(x, lowerbound, upperbound, replaceValue) = lowerbound < x < upperbound ? replaceValue : x
|
||
precision(x::Array{<:Array}) = ( std(mean.(x)) / mean(mean.(x)) ) * 100
|
||
precision(x::Array) = std(x) / mean(x) * 100
|
||
replaceAt!(x::AbstractVector, ind::Number, value::Number) = x[ind] = value
|
||
notZero(x::AbstractVector) = (!iszero).(x)
|
||
Zero(x::AbstractVector) = iszero.(x)
|
||
isNan(x::AbstractVector) = isnan.(x)
|
||
isInf(x::Number) = abs(x) === Inf
|
||
isInf(x::AbstractVector) = isinf.(x)
|
||
isNotEqual(x::Number, target::Number) = isequal(isequal(x, target), 0)
|
||
isBetween(x, lowerlimit, upperlimit) = lowerlimit < x < upperlimit ? true : false
|
||
absolute(x::AbstractVector) = abs.(x)
|
||
vecEleMul(x::AbstractVector, y::AbstractVector) = x .* y
|
||
vecEleMul(x::Number, y::AbstractVector) = x .* y
|
||
expDecay(initialValue::Number, decayFactor::Number, timePass::Number) =
|
||
initialValue * (1 - decayFactor)^timePass
|
||
mul!(x::AbstractVector, y::AbstractVector) = x .*= y
|
||
mul(x::AbstractVector, y::AbstractVector) = x .* y
|
||
allTrue(args...) = false ∈ [args...] ? false : true
|
||
|
||
ReLu(x::Number) = max(0, x)
|
||
|
||
updateVector!(x::AbstractVector, target::Number) = x .= target
|
||
updateVector!(x::AbstractVector, target::AbstractArray) = x .= target
|
||
|
||
function selectAdd!(x::AbstractVector, ind::AbstractVector, value::AbstractVector)
|
||
@. x = x + (ind * value)
|
||
end
|
||
|
||
|
||
|
||
""" FindIndex(input::String, target::Char)
|
||
|
||
Arguments:
|
||
text, input text
|
||
target, target character
|
||
Return:
|
||
(a bool vector of match/not match, position vector of the matched)
|
||
|
||
Example:
|
||
```jldoctest
|
||
julia> using GeneralUtils
|
||
julia> text = "Hello World!"
|
||
julia> findIndex(text, 'l')
|
||
(Bool[0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0], [3, 4, 10])
|
||
```
|
||
"""
|
||
function findIndex(text::String, target::Char)
|
||
charlist = [i for i in text]
|
||
match_position = isequal.(charlist, target)
|
||
match_index = findall(isequal.(match_position, 1))
|
||
|
||
return match_position, match_index
|
||
end
|
||
|
||
function findIndex(input::Array, target::Number)
|
||
match_position = isequal.(input, target)
|
||
match_index = findall(match_position)
|
||
return match_position, match_index
|
||
end
|
||
|
||
# function findIndex(input::Array, target::Array)
|
||
# match_position = isone.(zeros(length(input)))
|
||
# for i in target
|
||
# match_position = match_position + isequal.(input, i)
|
||
# end
|
||
|
||
# match_position = replaceMoreThan.(match_position, 1)
|
||
# match_index = findall(isone.(match_position)) # Findall donot work with Int64 vector [1, 0, 0, 1].
|
||
# # It only works with BitVector. isone() converts Int64 vector [1, 0, 0, 1] into
|
||
# # BitVector [1, 0, 0, 1]
|
||
|
||
# return match_position, match_index
|
||
# end
|
||
|
||
function findIndex(input::Array, target::Symbol)
|
||
match_position = isequal.(input, target)
|
||
match_index = findall(match_position)
|
||
|
||
return match_position, match_index
|
||
end
|
||
|
||
function findIndex(collection::Array{String}, target::String)
|
||
match_position = isequal.(collection, target)
|
||
match_index = findall(match_position)
|
||
|
||
return match_position, match_index
|
||
end
|
||
|
||
function findIndex(collection::Array{String}, target::Array{String})
|
||
match_position = nothing
|
||
match_index = nothing
|
||
|
||
for i in target
|
||
match_pos = isequal.(collection, i)
|
||
match_ind = findall(match_pos)
|
||
|
||
if match_position === nothing
|
||
match_position = match_pos
|
||
|
||
else
|
||
match_position = hcat(match_position, match_pos)
|
||
end
|
||
|
||
if match_index === nothing
|
||
match_index = match_ind
|
||
else
|
||
match_index = hcat(match_index, match_ind)
|
||
end
|
||
end
|
||
|
||
return match_position, match_index
|
||
end
|
||
|
||
function findIndex(collection::OrderedDict, target::Symbol)
|
||
collection_keys = keys(collection)
|
||
collection_keys_array = [i for i in collection_keys]
|
||
match_position = isequal.(collection_keys_array, target)
|
||
match_index = findall(match_position)
|
||
|
||
return match_position, match_index
|
||
end
|
||
|
||
function findMax(collection::AbstractVector)
|
||
maxValue, maxIndex = findmax(collection)
|
||
matchPosition = isequal.(collection, maxValue)
|
||
return maxValue, maxIndex, matchPosition
|
||
end
|
||
|
||
|
||
|
||
""" read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
|
||
|
||
with multiple text file in a folder,
|
||
this function read x_th text file in a folder (filename is sorted by OS)
|
||
|
||
# Example
|
||
utils.read_textfile_by_index(cleaned_data_path, 2)
|
||
read 2nd txt file in a folder
|
||
"""
|
||
function read_textfile_by_index(folder_path::String, read_file_number::Integer=1)
|
||
if isdir(folder_path)
|
||
filenumber = length(readdir(folder_path))
|
||
|
||
if read_file_number > filenumber
|
||
error("you specified read_file_number = $read_file_number which is out
|
||
of range, the cleaned data folder has only $filenumber files")
|
||
return nothing, nothing, nothing
|
||
else
|
||
content = 0
|
||
# open each file in the directory and read
|
||
filename = readdir(folder_path, join=true, sort=false)[read_file_number]
|
||
f = open(filename)
|
||
content = readlines(f)
|
||
# content = read(f)
|
||
close(f)
|
||
end
|
||
|
||
return read_file_number, filename, content
|
||
else
|
||
error("ERROR no file or folder at $folder_path")
|
||
return nothing, nothing, nothing
|
||
end
|
||
|
||
end
|
||
|
||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||
|
||
""" Array_to_JSON3_str(data::AbstractArray)
|
||
|
||
encode Array to JSON3 String
|
||
|
||
# Example
|
||
|
||
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
|
||
json3_str = Array_to_JSON3_str(a)
|
||
|
||
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
|
||
"""
|
||
function Array_to_JSON3_str(data::AbstractArray)
|
||
d = Dict("Array"=> data, "size"=>size(data))
|
||
json3_str = JSON3.write(d)
|
||
return json3_str
|
||
end
|
||
|
||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||
|
||
""" JSON3_str_to_Array(json3_str::String)
|
||
|
||
decode JSON3 String to Array
|
||
|
||
# Example
|
||
|
||
json3_str = {"Array":[1.23,9987.1,-0.0027,4.7889,-123.07,-6.75],"size":[3,2]}
|
||
a = JSON3_str_to_Array(json3_str)
|
||
|
||
a = [1.23 4.7889; 9987.1 -123.07; -0.0027 -6.75]
|
||
"""
|
||
function JSON3_str_to_Array(json3_str::String)
|
||
d = JSON3.read(json3_str)
|
||
array = reshape(Array(d.Array), (d.size[1], d.size[2]))
|
||
return array
|
||
end
|
||
|
||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||
|
||
""" Convert JSON3.read object to OrderedDict
|
||
|
||
# Example
|
||
dict = dictionary(["a"=>4, "b"=>6])
|
||
OrDict = OrderedDict(dict)
|
||
jsonString = JSON3.write(OrDict) # use jsonString to exchange. One can save it to file or send it thru pub/sub
|
||
jsonObject = JSON3.read(jsonString)
|
||
OrDict2 = JSON3read_to_OrDict(jsonObject) # example here
|
||
Adict2 = dictionary(OrDict2)
|
||
|
||
Andyferris's github https://github.com/andyferris/Dictionaries.jl
|
||
"""
|
||
function JSON3read_to_OrDict(x)
|
||
dict = OrderedDict()
|
||
for (k, v) in x
|
||
k = string(k)
|
||
dict[k] = v
|
||
end
|
||
return dict
|
||
end
|
||
|
||
#------------------------------------------------------------------------------------------------100
|
||
|
||
"""
|
||
print time of cpu executtion at the line inwhich this macro is used
|
||
"""
|
||
macro timeline(expr)
|
||
quote
|
||
print("line ", $(__source__.line), ": ")
|
||
@time $(esc(expr))
|
||
end
|
||
end
|
||
|
||
|
||
batchindex(batch_counter::Number, batch_size::Number; offset=0) =
|
||
(offset + (batch_counter-1) * batch_size + 1) : offset + (batch_counter * batch_size)
|
||
|
||
function flip_true_false(x::Bool)
|
||
if x == true
|
||
x = false
|
||
elseif x == false
|
||
x = true
|
||
else
|
||
error("undefined condition line $(@__LINE__)")
|
||
end
|
||
|
||
return x
|
||
end
|
||
|
||
function flip_true_false(x::Int)
|
||
if x == 1
|
||
x = 0
|
||
elseif x == 0
|
||
x = 1
|
||
else
|
||
throw("not define input of type $(typeof(x)) yet")
|
||
end
|
||
|
||
return x
|
||
end
|
||
|
||
"""
|
||
Return drawed index
|
||
# Example
|
||
drawed_index = randomWithProb([0.5, 0.2, 0.3])
|
||
|
||
"""
|
||
randomWithProb(probability::AbstractVector) = rand(Distributions.Categorical(probability)) # return drawed index
|
||
|
||
"""
|
||
Draw from choices according to its probability.
|
||
Probability range is 0.0 to 1.0 and all probability must summed up to 1
|
||
(may get probability from NNlib's softmax function)
|
||
|
||
# Example
|
||
|
||
draw = randomChoiceWithProb([true, false, nothing], [0.5, 0.2, 0.3])
|
||
"""
|
||
function randomChoiceWithProb(choices::Array, probability::Array)
|
||
if length(choices) != length(probability)
|
||
error("random is not possible, choices array length != probability array length")
|
||
elseif sum(probability) != 1.0
|
||
error("probability does not sum to 1.0")
|
||
end
|
||
|
||
return choices[randomWithProb(probability)]
|
||
end
|
||
|
||
function randomChoiceOnTarget(target::Number, targetMatch::Number, choices::AbstractVector,
|
||
probability::AbstractVector)
|
||
if length(choices) != length(probability)
|
||
throw("random is not possible, choices array length != probability array length")
|
||
end
|
||
return target == targetMatch ? randomChoiceWithProb(choices, probability) : target
|
||
# dist = Distributions.Categorical(probability)
|
||
# draw_result = choices[rand(dist)]
|
||
end
|
||
|
||
function randomChoiceOnTarget(target::AbstractVector, choiceList::AbstractVector,
|
||
probability::AbstractVector)
|
||
return randomChoiceOnTarget.(target, 1, (choiceList,), (probability,))
|
||
end
|
||
|
||
function linearly_weighted_avg(a::Array)
|
||
total = 0.0
|
||
for (i, v) in enumerate(a)
|
||
total = total + (i * v)
|
||
end
|
||
|
||
return total / sum(a)
|
||
end
|
||
|
||
|
||
""" Convert String that is holded inside a variable to Symbol
|
||
# Example
|
||
x = "hello" # x is a variable holding String "hello" \n
|
||
y = variable_to_symbol(x) # y holds :hello
|
||
"""
|
||
function variable_str_to_symbol(variable)
|
||
semi = :($variable)
|
||
symbol = Symbol(semi)
|
||
|
||
return symbol
|
||
end
|
||
|
||
|
||
""" get useable type of specified fieldname inside a composite struct
|
||
|
||
# Example
|
||
julia> @Base.kwdef mutable struct some_struct
|
||
a::Union{Bool, Nothing} = nothing
|
||
b::Union{Float64, Nothing} = nothing
|
||
c::Union{Int64, AbstractFloat} = 3.5
|
||
d::Union{String, Nothing} = nothing
|
||
end
|
||
|
||
julia> a = some_struct()
|
||
julia> fieldname_useable_type(some_struct, :c) =result=> [Int64, Float64]
|
||
"""
|
||
function fieldname_useable_type(somestruct, fieldname::Symbol;
|
||
test_types=[2.0, 2, true, :h, "str", 'c', missing, nothing])::Vector{DataType}
|
||
new_instance = somestruct()
|
||
useable_type = []
|
||
|
||
for i in test_types
|
||
try
|
||
new_instance.:($fieldname) = i
|
||
type = typeof(new_instance.:($fieldname))
|
||
if type ∉ useable_type
|
||
push!(useable_type, type)
|
||
end
|
||
catch
|
||
|
||
end
|
||
end
|
||
|
||
return useable_type
|
||
end
|
||
|
||
|
||
function randomNoRepeat(drawOptions::Array, draw_number::Integer;
|
||
exclude_list::Union{AbstractArray,Nothing}=nothing)
|
||
draw_option = copy(drawOptions)
|
||
draw_option = isnothing(exclude_list) ? draw_option :
|
||
filter!(x -> x ∉ exclude_list, draw_option)
|
||
shuffle!(draw_option)
|
||
drawed_items = []
|
||
while length(drawed_items) < draw_number
|
||
push!(drawed_items, pop!(draw_option))
|
||
end
|
||
return drawed_items
|
||
end
|
||
|
||
""" using cron to schedule backup job by
|
||
1. sudo nano /etc/crontab <<< this is a system-wide cron file
|
||
2. to execute julia file @ 2.00am everyday add the following line at the buttom of the file
|
||
0 2 * * * root julia-1.7 /home/syncthing_backup_script.jl
|
||
|
||
Requirements using Dates
|
||
"""
|
||
function folderBackup(sourceFolderAbsolutePath::String, # absolute path to folder to be backuped
|
||
backupFolderAbsolutePath::String; # absolute path to folder used to store backup file
|
||
totalBackupFiles::Integer=7, # total backup file, the oldest will be deleted
|
||
containerName::Union{Array{String}, Nothing}=nothing) # container using source_folder
|
||
|
||
sep = (Sys.iswindows() ? "\\" : '/')
|
||
|
||
if sourceFolderAbsolutePath[end] == sep
|
||
sourceFolderAbsolutePath = sourceFolderAbsolutePath[1:end-1]
|
||
end
|
||
if backupFolderAbsolutePath[end] != sl
|
||
backupFolderAbsolutePath = backupFolderAbsolutePath * sep
|
||
end
|
||
|
||
if isdir(backupFolderAbsolutePath)
|
||
else
|
||
mkpath(backupFolderAbsolutePath)
|
||
end
|
||
|
||
# stop running docker container service
|
||
if containerName !== nothing
|
||
println("stop running services")
|
||
for i in containerName
|
||
try run(`docker stop $i`) catch; end
|
||
sleep(10) # wait for services to stop
|
||
end
|
||
end
|
||
|
||
# do backup
|
||
println("doing backup now")
|
||
timestamp = string(Dates.now())
|
||
name = split(sourceFolderAbsolutePath, sep)[end] * "--"
|
||
filename = name * timestamp * ".zip" # resulting compressed filename
|
||
run(`chmod -R a+rwx $sourceFolderAbsolutePath`)
|
||
# zip -r [destination+filename] [source folder to be zipped]
|
||
run(`zip -r $(backupFolderAbsolutePath * filename) $sourceFolderAbsolutePath`)
|
||
|
||
# check if total backup file is more than user specified, if yes, delete the oldest backup
|
||
backupFiles = readdir(backupFolderAbsolutePath)
|
||
|
||
while length(backupFiles) > totalBackupFiles
|
||
run(`rm $(backupFolderAbsolutePath * backupFiles[1])`)
|
||
backupFiles = readdir(backupFolderAbsolutePath)
|
||
end
|
||
|
||
# start docker services
|
||
if containerName !== nothing
|
||
println("start services")
|
||
for i in containerName
|
||
try run(`docker start $i`) catch; end
|
||
sleep(10) # wait for services to stop
|
||
end
|
||
end
|
||
end
|
||
|
||
function lowerclip!(data::AbstractVector, lowerbound::Number)
|
||
replace!(x -> x < lowerbound ? lowerbound : x, data)
|
||
end
|
||
|
||
function upperclip!(data::AbstractVector, upperbound::Number)
|
||
replace!(x -> x > upperbound ? upperbound : x, data)
|
||
end
|
||
|
||
function normalise(x::AbstractArray, mu, std)
|
||
ϵ = oftype(x[1], 1e-5)
|
||
μ = mu
|
||
# σ = std(x, dims=dims, mean=μ, corrected=false) # use this when Zygote#478 gets merged
|
||
σ = std
|
||
return (x .- μ) ./ (σ .+ ϵ)
|
||
end
|
||
|
||
function minMaxScaler(x::AbstractVector)
|
||
min = findmin(x)[1]
|
||
max = findmax(x)[1]
|
||
|
||
scaler(a::Number, min::Number, max::Number) = (a-min) / (max-min)
|
||
return scaler.(x, min, max)
|
||
end
|
||
|
||
""" a = [-1e200, -1e-200, 1e200, 1e-200] \n
|
||
result = vtclamp.(a, 1e-6, 1e6, -1e6, -1e-6)
|
||
"""
|
||
function customclamp(x::Number, poslo::Number, poshi::Number,
|
||
neglo::Number, neghi::Number)
|
||
signx = sign(x)
|
||
if signx == -1
|
||
if neghi < x < 0
|
||
return neghi
|
||
elseif x < neglo
|
||
return neglo
|
||
else
|
||
return x
|
||
end
|
||
elseif signx == +1
|
||
if poshi < x
|
||
return poshi
|
||
elseif 0 < x < poslo
|
||
return poslo
|
||
else
|
||
return x
|
||
end
|
||
end
|
||
end
|
||
|
||
function unitVec(x::AbstractVector)
|
||
y = √(sum(x.^2))
|
||
return x./y
|
||
end
|
||
|
||
function replaceAt!(x::AbstractVector, ind::AbstractVector, value::Number)
|
||
for i in ind
|
||
x[i] = value
|
||
end
|
||
end
|
||
|
||
function signbitVec(x::AbstractVector)
|
||
sign = signbit.(x) * 1
|
||
signVec = replace(s -> s == 0 ? -1 : s, sign)
|
||
return signVec
|
||
end
|
||
|
||
function deleteall!(x::AbstractVector)
|
||
for i in 1:length(x)
|
||
deleteat!(x, 1)
|
||
end
|
||
end
|
||
|
||
""" Select specific range of vectors in a dict, return a new dict
|
||
# Example
|
||
dict = Dict(:a => [1:5...],
|
||
:b => [6:10...])
|
||
|
||
call -> selectRange(dict, 1:3)
|
||
return -> Dict{Any, Any} with 2 entries:
|
||
:a => [1, 2, 3]
|
||
:b => [6, 7, 8]
|
||
"""
|
||
function selectRange(d::Dict{Symbol, <:AbstractVector}, range)
|
||
newDict = Dict{Symbol, AbstractVector}()
|
||
for (k, v) in d
|
||
newDict[k] = v[range]
|
||
end
|
||
|
||
return newDict
|
||
end
|
||
|
||
""" Assign value to a given Dict by array of keys
|
||
|
||
# Example
|
||
d = Dict(
|
||
:a1=> Dict(:c=> 5),
|
||
:a2=> Dict(
|
||
:k=> 10,
|
||
:b=> Dict(
|
||
:s=> "target",
|
||
)
|
||
)
|
||
)
|
||
index = [:a2, :b, :s] \n
|
||
assignDict!(d, [:a2, :b, :s], "wow")
|
||
|
||
return 1 if no target key in a given dict.
|
||
"""
|
||
function assignDict!(dict::Dict, accessArray::Array{Symbol}, valueToAssign)
|
||
wd = nothing
|
||
for i in accessArray
|
||
println(i)
|
||
if i != accessArray[end]
|
||
if wd === nothing && haskey(dict, i)
|
||
wd = Ref(dict[i])
|
||
elseif wd.x !== nothing && haskey(wd.x, i)
|
||
wd = Ref(wd.x[i])
|
||
else
|
||
return 1 # error, no target key in a given dict.
|
||
end
|
||
else
|
||
wd.x[i] = valueToAssign
|
||
return 0
|
||
end
|
||
end
|
||
end
|
||
|
||
""" convert hour(0-23), minute(0-59) into julia time object
|
||
# Example
|
||
time
|
||
"""
|
||
function iTime(h::Integer, m::Integer)
|
||
if h == 0
|
||
h = 12
|
||
ampm = "am"
|
||
elseif 1 <= h <= 11
|
||
ampm = "am"
|
||
elseif h == 12
|
||
ampm = "pm"
|
||
elseif 13 <= h <= 23
|
||
h = h - 12
|
||
ampm = "pm"
|
||
else
|
||
error("hour out of range")
|
||
end
|
||
|
||
m = m < 10 ? "0$m" : m
|
||
t = "$h:$m$ampm"
|
||
|
||
return Time(t, "HH:MMp")
|
||
end
|
||
|
||
""" replace a number according to the limit
|
||
if value is lower than lowerbound return lowerbound replacement value
|
||
if value is more than upperbound return upperbound replacement value
|
||
|
||
# Example
|
||
limitvalue(4, (-5 => 0), (5 => 5))
|
||
"""
|
||
function limitvalue(v::Number, lowerbound::Pair, upperbound::Pair)
|
||
lwLimit, lwReplace = lowerbound
|
||
upLimit, upReplace = upperbound
|
||
|
||
if v < lwLimit
|
||
v = lwReplace
|
||
elseif v > upLimit
|
||
v = upReplace
|
||
else
|
||
end
|
||
return v
|
||
end
|
||
|
||
|
||
""" Assign matrix b to matrix a according to matrix b's CartesianIndex.
|
||
|
||
Arguments:\n
|
||
a : target matrix.
|
||
b : source matrix.
|
||
|
||
Return:\n
|
||
Resulting matrix a.
|
||
|
||
Example:\n
|
||
```jldoctest
|
||
julia> not done yet
|
||
```
|
||
"""
|
||
function cartesianAssign!(a, b)
|
||
for (i, v) in enumerate(b)
|
||
a[CartesianIndices(b)[i].I...] = v
|
||
end
|
||
return nothing
|
||
end
|
||
|
||
|
||
function sumAlongDim3(a::Array)
|
||
totalDim = length(size(a))
|
||
|
||
if totalDim == 3
|
||
d1, d2, d3 = size(a)
|
||
r = zeros(1, 1, d3)
|
||
for i in 1:d3
|
||
view(r, 1, 1, i) .= sum(a[:, :, i])
|
||
end
|
||
elseif totalDim == 4
|
||
d1, d2, d3, d4 = size(a)
|
||
r = zeros(1, 1, d3, d4)
|
||
for j in 1:d4
|
||
for i in 1:d3
|
||
view(r, 1, 1, i, j) .= sum(a[:, :, i, j])
|
||
end
|
||
end
|
||
else
|
||
error("this condition is not define yet")
|
||
end
|
||
|
||
return r
|
||
end
|
||
|
||
|
||
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all slice of 3D another matrix ,b, and
|
||
concatenate at the 4th dimension.
|
||
|
||
Example
|
||
julia> input = rand(32, 32, 128) # batch at 3rd dim
|
||
julia> weight = rand(32, 32, 1024)
|
||
julia> r = matMul3Dto3DmanyTo1batch(input, weight);
|
||
julia> size(r)
|
||
(32, 32, 1024, 128)
|
||
"""
|
||
function matMul3Dto3DmanyTo1batch(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
|
||
asize = [size(a)...]
|
||
bsize = [size(b)...]
|
||
if resultStorage === nothing
|
||
resultStorage = similar(a, eltype(b), bsize[1], bsize[2], bsize[3], asize[3])
|
||
end
|
||
|
||
c = [slice .* b for slice in eachslice(a, dims=3)]
|
||
resultStorage .= cat(c..., dims=4)
|
||
|
||
return resultStorage
|
||
end
|
||
|
||
|
||
""" GPU kernel
|
||
"""
|
||
function matMul3Dto3DmanyTo1batch_gpu!(a, b, resultStorage, linearToCartesian)
|
||
i = (blockIdx().x - 1) * blockDim().x + threadIdx().x
|
||
if i <= size(a, 3) # guard against unused threads to accessing memory out of bound
|
||
cartesianIndex = linearToCartesian(i, size(b)) # example for how to send "inner" function to gpu
|
||
# @cuprintln("gpu thread $i $cartesianIndex[2]")
|
||
|
||
@. @views resultStorage[:, :, :, i] = a[ :, :, i] * b
|
||
# view(resultStorage, :, :, :, i) .= view(a, :, :, i) .* b # alternative code
|
||
# @view(resultStorage[:, :, :, i]) .= @view(a[ :, :, i]) .* b # alternative code
|
||
end
|
||
return nothing
|
||
end
|
||
|
||
|
||
""" ELEMENT-wise multiply of each slice of 3D input matrix ,a, to all batch of another 4D matrix ,b, and
|
||
concatenate at the 4th dimension.
|
||
|
||
Example
|
||
julia>
|
||
julia> a = rand(2,2,3) # 3-batches
|
||
julia> b = rand(2,2,4,3) # 3-batches
|
||
julia> r = GeneralUtils.matMul_3Dto4D_batchwise(a, b);
|
||
julia> size(r)
|
||
(2, 2, 4, 3)
|
||
"""
|
||
function matMul_3Dto4D_batchwise(a::Array, b::Array; resultStorage::Union{Array, Nothing}=nothing)
|
||
if size(a, 3) != size(b, 4)
|
||
error("batch number of a and b must be equal")
|
||
end
|
||
if resultStorage === nothing
|
||
resultStorage = zeros(size(b, 1), size(b, 2), size(b, 3), size(a, 3))
|
||
end
|
||
for i in 1:size(a, 3)
|
||
view(resultStorage, :, :, :, i) .= a[:, :, i] .* b[:, :, :, i]
|
||
end
|
||
return resultStorage
|
||
end
|
||
|
||
""" GPU-compatible linear index to cartesian index conversion
|
||
"""
|
||
function linearToCartesian(i::Int, arraySize::NTuple{4,Int})
|
||
# Check that the linear coordinate is valid
|
||
# prod(arraySize) is the same as *(arraySize...). they multipy all elements in an array.
|
||
# but this code use prod() because splat breaks GPU performance
|
||
if i < 1 || i > prod(arraySize)
|
||
error("Invalid linear coordinate")
|
||
end
|
||
# Extract the dimensions of the matrix
|
||
n1, n2, n3, n4 = arraySize
|
||
# Compute the cartesian coordinate using rem and div functions
|
||
i1 = ((i-1) % (n1)) + 1 # +1 convert 0-based to 1-based index
|
||
i2 = ((i-1) ÷ (n1)) % n2 + 1
|
||
i3 = ((i-1) ÷ (n1*n2)) % n3 + 1
|
||
i4 = (i-1) ÷ (n1*n2*n3) + 1
|
||
# Return the cartesian coordinate as a tuple
|
||
return (i1, i2, i3, i4)
|
||
end
|
||
|
||
""" return a vector with true at max value and false for other value.
|
||
if vector is all-zeros, return all-false vector.
|
||
"""
|
||
function vectorMax(x)
|
||
if sum(isNotEqual.(x, 0)) == 0 # guard against all-zeros array
|
||
# instead of returning all-zeros original vector,
|
||
# return all-false vector to prevent type instability
|
||
return isNotEqual.(x, 0)
|
||
else
|
||
return isequal.(x, maximum(x))
|
||
end
|
||
end
|
||
|
||
function multiply_last(matrix, x, n)
|
||
# X is the scalar to multiply
|
||
# matrix is the column-major 2D matrix
|
||
# n is the number of elements to be multiplied, starting from the last one
|
||
# returns a new matrix with the same shape as the original one
|
||
|
||
# get the number of rows and columns of the matrix
|
||
rows, cols = size(matrix)
|
||
|
||
# create a copy of the matrix to avoid mutating the original one
|
||
result = copy(matrix)
|
||
|
||
# loop over the last n elements in column-major order
|
||
for i in (rows * cols - n + 1):(rows * cols)
|
||
# get the row and column indices of the current element
|
||
row = (i - 1) % rows + 1
|
||
col = (i - 1) ÷ rows + 1
|
||
|
||
# multiply the element by X and store it in the result matrix
|
||
result[row, col] *= x
|
||
end
|
||
|
||
# return the result matrix
|
||
return result
|
||
end
|
||
|
||
function multiplyRandomElements(A, x, n, rng=MersenneTwister(1234))
|
||
# rng is a random number generator object, see https://docs.julialang.org/en/v1/stdlib/Random/
|
||
# x is a scalar value to multiply by
|
||
# A is a column-major 2D matrix or a vector
|
||
# n is the number of elements to be multiplied
|
||
# returns a new array with n randomly chosen distinct elements multiplied by x
|
||
B = copy(A) # make a copy of A to avoid mutating it
|
||
d = ndims(A) # get the number of dimensions of A
|
||
if d == 1 # if A is a vector
|
||
m = length(A) # get the length of A
|
||
indices = collect(1:m) # create an array of indices from 1 to m
|
||
shuffle!(rng, indices) # shuffle the indices in-place using the RNG
|
||
for i in 1:n # loop n times
|
||
j = indices[i] # get the i-th shuffled index
|
||
B[j] *= x # multiply the element at j by x
|
||
end
|
||
elseif d == 2 # if A is a matrix
|
||
m = size(A, 1) # number of rows in A
|
||
p = size(A, 2) # number of columns in A
|
||
indices = collect(1:m*p) # create an array of linear indices from 1 to m*p
|
||
shuffle!(rng, indices) # shuffle the indices in-place using the RNG
|
||
for i in 1:n # loop n times
|
||
j = indices[i] # get the i-th shuffled index
|
||
B[j] *= x # multiply the element at j by x
|
||
end
|
||
else # if A is neither a vector nor a matrix
|
||
error("A must be a vector or a matrix")
|
||
end
|
||
return B # return the new array
|
||
end
|
||
|
||
""" Randomly (rng controlled) choose position of elements that has value, markValue, from matrix mask and
|
||
replace matrix A's elements of the same position with value, a.
|
||
|
||
Example
|
||
julia> mask = rand([-1,0,1],4,4,1)
|
||
julia> A = rand(4,4,1)
|
||
julia> C = replaceElements(mask, A, -1, 5.0, 3)
|
||
"""
|
||
function replaceElements(mask::AbstractArray{<:Any}, markValue::Number, A::AbstractArray{<:Any}, a::Number,
|
||
n::Int=0; rng::AbstractRNG=MersenneTwister(1234))
|
||
""" Prompt
|
||
Write a julia function to operate on column-major 3D matrix. The function randomly
|
||
choose elements in matrix mask that has value markValue and replace elements in matrix A at
|
||
the same position with value a. The choosing randomness is controlled by rng function.
|
||
I also want to specify how many elements to be replaced.
|
||
"""
|
||
|
||
total_x_tobeReplced = sum(isequal.(mask, markValue))
|
||
if n == 0 || n > total_x_tobeReplced
|
||
n = total_x_tobeReplced
|
||
end
|
||
|
||
# check if mask and A have the same size
|
||
if size(mask) != size(A)
|
||
error("mask and A must have the same size")
|
||
end
|
||
C = copy(A)
|
||
# get the indices of elements in mask that equal markValue
|
||
indices = findall(x -> x == markValue, mask)
|
||
# shuffle the indices using the rng function
|
||
shuffle!(rng, indices)
|
||
# select the first n indices
|
||
selected = indices[1:n]
|
||
# replace the elements in A at the selected positions with a
|
||
for i in selected
|
||
C[i] = a
|
||
end
|
||
return C
|
||
end
|
||
|
||
""" Randomly (rng controlled) choose position of elements that has value, markValue, from matrix mask and
|
||
replace matrix A's elements of the same position with value, a. if n == 0, all marked value is replaced
|
||
|
||
Example
|
||
julia> mask = rand([-1,0,1],4,4,1)
|
||
julia> A = rand(4,4,1)
|
||
julia> C = replaceElements(mask, A, -1, 5.0, 3)
|
||
"""
|
||
function replaceElements!(mask::AbstractArray{<:Any}, markValue::Number, A::AbstractArray{<:Any}, a::Number,
|
||
n::Int=0; rng::AbstractRNG=MersenneTwister(1234))
|
||
total_x_tobeReplced = sum(isequal.(mask, markValue))
|
||
remaining = 0
|
||
if n == 0 || n > total_x_tobeReplced
|
||
remaining = n - total_x_tobeReplced
|
||
n = total_x_tobeReplced
|
||
end
|
||
|
||
# check if mask and A have the same size
|
||
if size(mask) != size(A)
|
||
error("mask and A must have the same size, mask $(size(mask)) A $(size(A))")
|
||
end
|
||
# get the indices of elements in mask that equal markValue
|
||
indices = findall(x -> x == markValue, mask)
|
||
# shuffle the indices using the rng function
|
||
shuffle!(rng, indices)
|
||
# select the first n indices
|
||
selected = indices[1:n]
|
||
# replace the elements in A at the selected positions with a
|
||
for i in selected
|
||
A[i] = a
|
||
end
|
||
|
||
return remaining
|
||
end
|
||
|
||
""" Replace n elements that has value x with user specified value a.
|
||
"""
|
||
function replaceElements(A::AbstractArray{<:Any}, x::Number, a::Number, n::Int=0, rng=MersenneTwister(1234))
|
||
total_x_tobeReplced = sum(isequal.(A, x))
|
||
if n == 0 || n > total_x_tobeReplced
|
||
n = total_x_tobeReplced
|
||
end
|
||
|
||
B = copy(A)
|
||
# A is a column-major 3D matrix
|
||
# x is the value to be replaced
|
||
# a is the new value
|
||
# rng is a random number generator function
|
||
# n is the number of elements to be replaced
|
||
|
||
# find the indices of elements in A that equal x
|
||
indices = findall(==(x), B)
|
||
|
||
# shuffle the indices using the rng function
|
||
shuffle!(rng, indices)
|
||
|
||
# select the first n indices
|
||
selected = indices[1:n]
|
||
|
||
# replace the elements at the selected indices with a
|
||
for i in selected
|
||
B[i] = a
|
||
end
|
||
|
||
# return the modified matrix A
|
||
return B
|
||
end
|
||
|
||
|
||
function replaceElements!(A::AbstractArray{<:Any}, x::Number, a::Number, n::Int=0, rng=MersenneTwister(1234))
|
||
total_x_tobeReplced = sum(isequal.(A, x))
|
||
if n == 0 || n > total_x_tobeReplced
|
||
n = total_x_tobeReplced
|
||
end
|
||
|
||
# A is a column-major 3D matrix
|
||
# x is the value to be replaced
|
||
# a is the new value
|
||
# rng is a random number generator function
|
||
# n is the number of elements to be replaced
|
||
|
||
# find the indices of elements in A that equal x
|
||
indices = findall(==(x), A)
|
||
|
||
# shuffle the indices using the rng function
|
||
shuffle!(rng, indices)
|
||
|
||
# select the first n indices
|
||
selected = indices[1:n]
|
||
|
||
# replace the elements at the selected indices with a
|
||
for i in selected
|
||
A[i] = a
|
||
end
|
||
end
|
||
|
||
|
||
|
||
|
||
|
||
""" Get characters between specified characters.
|
||
|
||
# Arguments
|
||
- `text::T`
|
||
a text being searched
|
||
- `startChar::Char`
|
||
start character
|
||
- `endChar::Char`
|
||
end character
|
||
# Keyword Arguments
|
||
- `endCharLocation::String`
|
||
end character position after startChar. Can be "next" or "end". "next" means the closed
|
||
endChar just after startChar. "end" means the furthest endChar.
|
||
- `includeChar::Bool`
|
||
whether to include the startChar and endChar. Default is true
|
||
# Return
|
||
the characters between specified characters.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> text = "{\"ask\": {\"text\": \"Could you please tell me about the special event?\"\n}}\n\n"
|
||
julia> GeneralUtils.getStringBetweenCharacters(text, '{', '}', endCharLocation="end")
|
||
"{\"ask\": {\"text\": \"Could you please tell me about the special event?\"\n}}"
|
||
```
|
||
# Signature
|
||
"""
|
||
function getStringBetweenCharacters(text::T, startChar::Char, endChar::Char;
|
||
endCharLocation::String="next", includeChar::Bool=true)::String where {T<:AbstractString}
|
||
|
||
# get the position of the startChar
|
||
startCharPosition = findfirst(startChar, text)
|
||
endCharPosition = nothing
|
||
|
||
if endCharLocation == "end"
|
||
# get the first position of the endChar coming from the end of text
|
||
endCharPosition = findlast(endChar, text)
|
||
elseif endCharLocation == "next"
|
||
# get the first position of the endChar after startCharPosition
|
||
endCharPosition = findnext(endChar, text, startCharPosition + 1)
|
||
else
|
||
error("endCharPositio must be \"end\" or \"next\"")
|
||
end
|
||
|
||
@show startCharPosition, endCharPosition
|
||
|
||
# get characters between startChar and endChar from text
|
||
extractedText = text[startCharPosition:endCharPosition]
|
||
# convert substring to string
|
||
extractedText = string(extractedText)
|
||
|
||
extractedText = includeChar == true ? extractedText : extractedText[2:end-1]
|
||
|
||
return extractedText
|
||
end
|
||
|
||
|
||
|
||
""" Read JSON string and return a dictionary with string key. (JSON3 defaults to symbol key)
|
||
This function solve the problem of reading JSON with string key.
|
||
|
||
Arguments:
|
||
jsonString::String
|
||
|
||
Return:
|
||
a dictionary with string key
|
||
|
||
Example:
|
||
```jldoctest
|
||
julia> jsonString = {\"wine type\": \"Red\", \"intensity level\": \"medium-bodied\"}
|
||
julia> JSON3read_stringKey(jsonString)
|
||
Dict{String, Any} with 2 entries:
|
||
"intensity level" => "medium-bodied"
|
||
"wine type" => "Red"
|
||
```
|
||
"""
|
||
function JSON3read_stringKey(jsonString::AbstractString)
|
||
jsonobj = JSON3.read(jsonString)
|
||
newDict = OrderedDict{String,Any}()
|
||
for (k,v) in jsonobj
|
||
newDict[string(k)] = v
|
||
end
|
||
return newDict
|
||
end
|
||
|
||
|
||
""" Create nested dict path if it does not already exist. The same concept as Julia's mkpath()
|
||
|
||
# Arguments
|
||
- `dict::Dict`
|
||
target dict
|
||
- `addkeys::Union{Vector{String}, Vector{Symbol}}`
|
||
keys to be added to dict
|
||
- `value`
|
||
value to be added to dict at final key in keypath
|
||
|
||
# Return
|
||
- dict with added keypath
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> d = Dict{String, Any}("a" => Dict{String, Any}("b" => 10))
|
||
julia> GeneralUtils.mkDictPath!(d, ["a", "v", "x", "y", "z"], 42)
|
||
Dict{String, Any} with 1 entry:
|
||
"path" => Dict{Any, Any}("to"=>Dict{Any, Any}("nested"=>Dict{Any, Any}("value"=>42)))
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function mkDictPath!(dict::Union{Dict{Symbol, Any}, Dict{String, Any}},
|
||
addkeys::Union{Vector{String}, Vector{Symbol}}, value)
|
||
# new key and existing key must be the same type
|
||
if !isempty(dict)
|
||
existingKeys = [key for key in keys(dict)]
|
||
if typeof(existingKeys[1]) != typeof(addkeys[1])
|
||
error("Type of keys being added is $(typeof(addkeys[1])) but type of existing keys is $(typeof(existingKeys[1]))")
|
||
end
|
||
end
|
||
|
||
for key in addkeys[1:end-1]
|
||
if !haskey(dict, key)
|
||
key_type = eltype(keys(dict))
|
||
dict[key] = Dict{key_type, Any}()
|
||
end
|
||
dict = dict[key]
|
||
end
|
||
|
||
return dict[addkeys[end]] = value
|
||
end
|
||
|
||
|
||
""" Get nested dict value using a vector of keys
|
||
|
||
# Arguments
|
||
- `dict::Dict`
|
||
target dict
|
||
- `keys::Vector`
|
||
keys vector
|
||
|
||
# Return
|
||
- dict with added keypath
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> d = Dict{Symbol, Any}(:a => Dict{Symbol, Any}(:b => 10))
|
||
julia> GeneralUtils.getDictPath(d, [:a, :b])
|
||
10
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function getDictPath(dict::Dict, keys::Vector)
|
||
current_dict = dict
|
||
for key in keys[1:end-1]
|
||
if haskey(current_dict, key)
|
||
current_dict = current_dict[key]
|
||
else
|
||
throw(ArgumentError("Key $key not found in dictionary"))
|
||
end
|
||
end
|
||
|
||
last_key = keys[end]
|
||
if haskey(current_dict, last_key)
|
||
return current_dict[last_key]
|
||
else
|
||
throw(ArgumentError("Key $last_key not found in dictionary"))
|
||
end
|
||
end
|
||
|
||
|
||
|
||
"""
|
||
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
|
||
|
||
Detects and collects all case-variant occurrences of multiple keywords in the text.
|
||
This function processes each keyword individually and returns an array of matched variations for each keyword.
|
||
|
||
# Arguments
|
||
- `keywords::AbstractVector{String}` Vector of keywords to search for
|
||
- `text::String` The text to search in
|
||
|
||
# Returns
|
||
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
|
||
|
||
# Examples
|
||
```jldoctest
|
||
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
|
||
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
|
||
"""
|
||
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
|
||
kw = Dict{String, Union{Array, Nothing}}()
|
||
|
||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||
for keyword in keywords
|
||
ws = detectKeywordVariation.(keyword, text)
|
||
total = sum(issomething.(ws))
|
||
if total != 0
|
||
kw[keyword] = ws
|
||
else
|
||
kw[keyword] = nothing
|
||
end
|
||
end
|
||
return kw
|
||
end
|
||
|
||
|
||
"""
|
||
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
|
||
|
||
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||
|
||
# Arguments:
|
||
- `keyword::String` The keyword to search for
|
||
- `text::String` The text to search in
|
||
|
||
# Returns:
|
||
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
|
||
|
||
# Examples:
|
||
```jldoctest
|
||
julia> detectKeywordVariation("test", "This is a Test case")
|
||
["Test"]
|
||
|
||
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
|
||
["ERRORS"]
|
||
|
||
julia> detectKeywordVariation("missing", "complete data")
|
||
nothing
|
||
"""
|
||
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
|
||
# Define the keyword variations to search for
|
||
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
|
||
# wordVariations may duplicate keyword
|
||
keyword_variations = [keyword]
|
||
for i in wordVariations
|
||
i != keyword ? push!(keyword_variations, i) : nothing
|
||
end
|
||
|
||
_splittext = string.(strip.(split(text, " ")))
|
||
splittext = String[]
|
||
# remove . after a word
|
||
for i in _splittext
|
||
if length(i) != 0 && i[end] ∈ ['.']
|
||
word = string(i[1:end-1])
|
||
push!(splittext, word)
|
||
else
|
||
push!(splittext, i)
|
||
end
|
||
end
|
||
|
||
result = String[]
|
||
for variation in keyword_variations
|
||
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
|
||
r = findIndex(splittext, variation)
|
||
|
||
if isempty(r[2])
|
||
# skip
|
||
else
|
||
# if variation > 1 add them all so this function detect duplicate keyword
|
||
variations = [variation for i in eachindex(r[2])]
|
||
result = vcat(result, variations)
|
||
end
|
||
end
|
||
return result
|
||
end
|
||
|
||
|
||
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
|
||
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
|
||
The left most string which has no keyword will be discarded. WARNING, ordering is important
|
||
|
||
# Arguments
|
||
- `text::String`
|
||
A text to be converted.
|
||
- `keywords::Vector{String}`
|
||
A list of keywords to be used to slice the text.
|
||
These keywords also be the resulting dict keys.
|
||
# Keyword Arguments
|
||
- `rightmarker::String`
|
||
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
|
||
the function will search for "plan:" otherwise the function will search for "plan".
|
||
The marker will not be in the resulting dict keys.
|
||
- `symbolkey::Bool`
|
||
If true, resulting dict's key will be Symbols, otherwise string.
|
||
- `lowercasekey::Bool`
|
||
set resulting dict's key to be lowercase
|
||
|
||
# Return
|
||
- `d::OrderedDict`
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
|
||
julia> sample_keywords = ["thought", "plan", "action"]
|
||
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
|
||
julia> println(resultdict)
|
||
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
|
||
:plan => "wake up and going out",
|
||
:action => "1. wake up 2. eat 3. sleep")
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function textToDict(text::String, detectKeywords::Vector{String};
|
||
dictKey::Union{Vector{String}, Nothing}=nothing,
|
||
symbolkey::Bool=false, lowercasekey::Bool=false
|
||
)::OrderedDict
|
||
|
||
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
|
||
kw = []
|
||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||
for keyword in detectKeywords
|
||
detected = detectKeywordVariation(keyword, text)
|
||
if detected !== nothing
|
||
push!(kw, detected)
|
||
else
|
||
error("Keyword $keyword not found in text: $text")
|
||
end
|
||
end
|
||
if typeof(kw[1]) <: AbstractArray
|
||
kw = reduce(vcat, kw)
|
||
end
|
||
|
||
od1, od2 =
|
||
if symbolkey
|
||
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
|
||
else
|
||
OrderedDict{String, Any}(), OrderedDict{String, Any}()
|
||
end
|
||
|
||
remainingtext = text
|
||
dictKey_ = reverse(dictKey)
|
||
|
||
# process text from back to front
|
||
rkw = reverse(kw)
|
||
for (i,keyword) in enumerate(rkw)
|
||
# Find the position of the keyword in the text
|
||
keywordidx = findlast(keyword, remainingtext)
|
||
dKey = dictKey_[i]
|
||
|
||
if keywordidx !== nothing
|
||
substr = remainingtext[keywordidx[end]+1:end]
|
||
str = string(strip(substr)) # Removes both leading and trailing whitespace.
|
||
_key = lowercasekey == true ? lowercase(dKey) : dKey
|
||
key = symbolkey == true ? Symbol(_key) : _key
|
||
od1[key] = str
|
||
remainingtext = remainingtext[1:keywordidx[1]-1]
|
||
else
|
||
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
|
||
end
|
||
end
|
||
|
||
# correct the order
|
||
ks = reverse([i for i in keys(od1)])
|
||
for k in ks
|
||
k = symbolkey == true ? Symbol(k) : k
|
||
od2[k] = od1[k]
|
||
end
|
||
|
||
return od2
|
||
end
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
end # module |