update
This commit is contained in:
@@ -19,14 +19,15 @@ const TIMEOUT_SECS = 30 # request timeout
|
||||
const ATTEMPTS_PER_CHECK = 1 # number of ping attempts per check
|
||||
const BACKOFF_BETWEEN_ATTEMPTS = 1 # seconds between ping attempts
|
||||
const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot
|
||||
const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds
|
||||
const DRY_RUN = false # set false to actually reboot
|
||||
const COOLDOWN_AFTER_REBOOT_SECS = 120 # do not reboot again within this many seconds
|
||||
const DRY_RUN = true # set false to actually reboot
|
||||
const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds
|
||||
|
||||
const thisFolderPath = @__DIR__
|
||||
const thisFilePath = @__FILE__
|
||||
const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast
|
||||
const StateFilePath = "$thisFolderPath/check_and_reboot_state.json"
|
||||
|
||||
# println(0)
|
||||
# Simple broadcast helper
|
||||
# Simple broadcast helper (safe Cmd construction)
|
||||
function broadcast_msg(msg::AbstractString)
|
||||
@@ -226,12 +227,12 @@ end
|
||||
|
||||
# Single check iteration
|
||||
function perform_check!(st::State)
|
||||
|
||||
# println(1)
|
||||
success = false
|
||||
last_result = nothing
|
||||
for i in 1:ATTEMPTS_PER_CHECK
|
||||
ok, result = check_router_once(ROUTER_IP)
|
||||
# ok, result = values(JSON.parsefile("test_ping_result.json")) # for testing without actual ping
|
||||
# ok, result = check_router_once(ROUTER_IP)
|
||||
ok, result = values(JSON.parsefile("/home/ton/docker-programs/check_and_reboot/test_ping_result.json")) # for testing without actual ping
|
||||
if ok
|
||||
success = true
|
||||
break
|
||||
@@ -244,13 +245,14 @@ function perform_check!(st::State)
|
||||
in_cooldown = false
|
||||
if st.last_reboot_datetime !== nothing
|
||||
timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int
|
||||
|
||||
if timepass < COOLDOWN_AFTER_REBOOT_SECS
|
||||
in_cooldown = true
|
||||
end
|
||||
end
|
||||
|
||||
# @show in_cooldown
|
||||
# println(2)
|
||||
if in_cooldown
|
||||
# println("2-1")
|
||||
# During cooldown, track failures but don't trigger reboot yet
|
||||
if success
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
@@ -258,6 +260,7 @@ function perform_check!(st::State)
|
||||
st.consecutive_fails = 0
|
||||
save_state(st, StateFilePath)
|
||||
else
|
||||
# println("2-2")
|
||||
st.consecutive_fails += 1
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("$ROUTER_IP is unreachable during cooldown. Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
|
||||
@@ -265,37 +268,44 @@ function perform_check!(st::State)
|
||||
end
|
||||
return
|
||||
end
|
||||
|
||||
# println(3)
|
||||
# Outside cooldown - full check with potential reboot
|
||||
if success
|
||||
if st.consecutive_fails > 0
|
||||
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
|
||||
else
|
||||
# logmsg("$ROUTER_IP is reachable.")
|
||||
end
|
||||
st.consecutive_fails = 0
|
||||
save_state(st, StateFilePath)
|
||||
return
|
||||
# println("3-1")
|
||||
if st.consecutive_fails > 0
|
||||
# println("3-2")
|
||||
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
|
||||
else
|
||||
# logmsg("$ROUTER_IP is reachable.")
|
||||
end
|
||||
st.consecutive_fails = 0
|
||||
save_state(st, StateFilePath)
|
||||
return
|
||||
else
|
||||
st.consecutive_fails += 1
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
|
||||
save_state(st, StateFilePath)
|
||||
st.consecutive_fails += 1
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
|
||||
save_state(st, StateFilePath)
|
||||
end
|
||||
# println(4)
|
||||
if st.consecutive_fails >= FAILS_TO_REBOOT
|
||||
# println("4-1")
|
||||
save_state(st, StateFilePath)
|
||||
ok = do_reboot()
|
||||
if ok
|
||||
# println("4-2")
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("Reboot executed (or simulated). Resetting failure counter.")
|
||||
st.consecutive_fails = 0
|
||||
st.last_reboot_datetime = Dates.now()
|
||||
@show st
|
||||
@show StateFilePath
|
||||
save_state(st, StateFilePath)
|
||||
ok = do_reboot()
|
||||
if ok
|
||||
thisFilePath = @__FILE__
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("Reboot executed (or simulated). Resetting failure counter.")
|
||||
st.consecutive_fails = 0
|
||||
st.last_reboot_datetime = Dates.now()
|
||||
save_state(st, StateFilePath)
|
||||
else
|
||||
logmsg("Reboot attempt failed; will retry after next interval.")
|
||||
end
|
||||
else
|
||||
logmsg("Reboot attempt failed; will retry after next interval.")
|
||||
end
|
||||
end
|
||||
# println(5)
|
||||
end
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user