This commit is contained in:
2026-03-20 14:48:06 +07:00
parent 0682019085
commit ff7f95e1bb
3 changed files with 132 additions and 124 deletions

View File

@@ -19,14 +19,15 @@ const TIMEOUT_SECS = 30 # request timeout
const ATTEMPTS_PER_CHECK = 1 # number of ping attempts per check
const BACKOFF_BETWEEN_ATTEMPTS = 1 # seconds between ping attempts
const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot
const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds
const DRY_RUN = false # set false to actually reboot
const COOLDOWN_AFTER_REBOOT_SECS = 120 # do not reboot again within this many seconds
const DRY_RUN = true # set false to actually reboot
const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds
const thisFolderPath = @__DIR__
const thisFilePath = @__FILE__
const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast
const StateFilePath = "$thisFolderPath/check_and_reboot_state.json"
# println(0)
# Simple broadcast helper
# Simple broadcast helper (safe Cmd construction)
function broadcast_msg(msg::AbstractString)
@@ -226,12 +227,12 @@ end
# Single check iteration
function perform_check!(st::State)
# println(1)
success = false
last_result = nothing
for i in 1:ATTEMPTS_PER_CHECK
ok, result = check_router_once(ROUTER_IP)
# ok, result = values(JSON.parsefile("test_ping_result.json")) # for testing without actual ping
# ok, result = check_router_once(ROUTER_IP)
ok, result = values(JSON.parsefile("/home/ton/docker-programs/check_and_reboot/test_ping_result.json")) # for testing without actual ping
if ok
success = true
break
@@ -244,13 +245,14 @@ function perform_check!(st::State)
in_cooldown = false
if st.last_reboot_datetime !== nothing
timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int
if timepass < COOLDOWN_AFTER_REBOOT_SECS
in_cooldown = true
end
end
# @show in_cooldown
# println(2)
if in_cooldown
# println("2-1")
# During cooldown, track failures but don't trigger reboot yet
if success
broadcast_msg("Broadcasting from file: $thisFilePath")
@@ -258,6 +260,7 @@ function perform_check!(st::State)
st.consecutive_fails = 0
save_state(st, StateFilePath)
else
# println("2-2")
st.consecutive_fails += 1
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is unreachable during cooldown. Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
@@ -265,37 +268,44 @@ function perform_check!(st::State)
end
return
end
# println(3)
# Outside cooldown - full check with potential reboot
if success
if st.consecutive_fails > 0
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
else
# logmsg("$ROUTER_IP is reachable.")
end
st.consecutive_fails = 0
save_state(st, StateFilePath)
return
# println("3-1")
if st.consecutive_fails > 0
# println("3-2")
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
else
# logmsg("$ROUTER_IP is reachable.")
end
st.consecutive_fails = 0
save_state(st, StateFilePath)
return
else
st.consecutive_fails += 1
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
save_state(st, StateFilePath)
st.consecutive_fails += 1
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
save_state(st, StateFilePath)
end
# println(4)
if st.consecutive_fails >= FAILS_TO_REBOOT
# println("4-1")
save_state(st, StateFilePath)
ok = do_reboot()
if ok
# println("4-2")
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("Reboot executed (or simulated). Resetting failure counter.")
st.consecutive_fails = 0
st.last_reboot_datetime = Dates.now()
@show st
@show StateFilePath
save_state(st, StateFilePath)
ok = do_reboot()
if ok
thisFilePath = @__FILE__
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("Reboot executed (or simulated). Resetting failure counter.")
st.consecutive_fails = 0
st.last_reboot_datetime = Dates.now()
save_state(st, StateFilePath)
else
logmsg("Reboot attempt failed; will retry after next interval.")
end
else
logmsg("Reboot attempt failed; will retry after next interval.")
end
end
# println(5)
end