Files
check_and_reboot/check_router_reboot.jl
2026-03-20 15:18:07 +07:00

356 lines
11 KiB
Julia

#!/usr/bin/env julia
# check_and_reboot_loop.jl
# Usage: julia check_and_reboot_loop.jl
# Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`)
# ------------------------------------------------------------------------------------------------ #
# add the following to root's crontab (sudo crontab -e) #
# ------------------------------------------------------------------------------------------------ #
# ---------------- For PC that has juliar for root user and julia for normal user ---------------- #
# linux_prompt> @reboot sleep 30 && /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1
# *** juliar is julia that I install separately for root user (sudo crontab -e) which I symlinked to juliar because I want to separate it from user's julia
# ---------------------------------- For PC that has only julia ---------------------------------- #
# linux_prompt> @reboot sleep 30 && /root/.juliaup/bin/julia /root/docker-apps/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1
using Dates, Printf, JSON
# Configuration
const ROUTER_IP = "192.168.88.1"
const TIMEOUT_SECS = 30 # request timeout
const ATTEMPTS_PER_CHECK = 1 # number of ping attempts per check
const BACKOFF_BETWEEN_ATTEMPTS = 1 # seconds between ping attempts
const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot
const COOLDOWN_AFTER_REBOOT_SECS = 120 # do not reboot again within this many seconds
const DRY_RUN = true # set false to actually reboot
const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds
const thisFolderPath = @__DIR__
const thisFilePath = @__FILE__
const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast
const StateFilePath = "$thisFolderPath/check_and_reboot_state.json"
# println(0)
# Simple broadcast helper
# Simple broadcast helper (safe Cmd construction)
function broadcast_msg(msg::AbstractString)
try
if Sys.islinux()
# Try wall if available by writing to its stdin
wall_paths = ("/usr/bin/wall", "/bin/wall")
for p in wall_paths
if isfile(p)
try
proc = open(`$p`, "w")
try
write(proc, msg * "\n")
finally
close(proc)
end
return true
catch
# ignore and try next
end
end
end
# Fallback to logger (safe arg passing)
try
run(Cmd(["logger", msg]))
return true
catch
end
elseif Sys.isapple()
# Use AppleScript notification as a fallback (escape double quotes)
try
escaped = replace(msg, "\"" => "\\\"")
applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\""
run(Cmd(["osascript", "-e", applescript]))
return true
catch
end
elseif Sys.iswindows()
# Try msg to all sessions (may require privileges); best-effort
try
run(Cmd(["msg", "*", msg]))
return true
catch
end
end
catch
# swallow any unexpected errors
end
return false
end
# Simple logging (prints, appends to LogFilePath, and broadcasts)
function logmsg(s::AbstractString)
t = Dates.now()
out = "[$t] $s"
# write to LogFilePath (append)
try
open(LogFilePath, "a") do io
println(io, out)
end
catch e
# If LogFilePath write fails, fallback to stdout
println("[$t] (log write failed: $e) $s")
end
# Also print to stdout for immediate console visibility
println(out)
# Best-effort system broadcast so operators on console see it
try
broadcast_msg(out)
catch
# ignore broadcast failures
end
end
# State handling
mutable struct State
consecutive_fails::Int
last_reboot_datetime::Union{DateTime, Nothing}
end
function load_state(StateFilePath)
try
if isfile(StateFilePath)
jsonObj = JSON.parsefile(StateFilePath)
cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0
lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing
return State(cf, DateTime(lr))
end
catch e
logmsg("Warning loading state: $e")
end
return State(0, nothing)
end
function save_state(st::State, StateFilePath)
obj = Dict("consecutive_fails" => st.consecutive_fails,
"last_reboot_datetime" => st.last_reboot_datetime)
JSON.json(StateFilePath, obj)
end
# Router ping check
function check_router_once(ip::AbstractString; timeout=TIMEOUT_SECS)
try
# Use ping with count=1 and timeout in seconds
# -W is timeout in seconds on Linux
if Sys.islinux()
cmd = `ping -c 1 -W $timeout $ip`
result = run(cmd)
return result.exitcode == 0, "exitcode: $(result.exitcode)"
elseif Sys.isapple()
cmd = `ping -c 1 -W $timeout $ip`
result = run(cmd)
return result.exitcode == 0, "exitcode: $(result.exitcode)"
elseif Sys.iswindows()
cmd = `ping -n 1 -w $((timeout * 1000)) $ip`
result = run(cmd)
return result.exitcode == 0, "exitcode: $(result.exitcode)"
else
return false, "unsupported OS"
end
catch e
return false, string(e)
end
end
# Check if running as root
function is_root()::Bool
try
result = read(`whoami`, String)
return strip(result) == "root"
catch
return false
end
end
# Reboot command selection
# Return program and separate args as plain strings
function reboot_command()
if Sys.iswindows()
return ("/usr/bin/cmd", "/C", "shutdown /r /t 0")
elseif Sys.isapple()
if is_root()
return ("/sbin/reboot")
else
return ("/usr/bin/sudo", "shutdown", "-r", "now")
end
elseif Sys.islinux()
if is_root()
if isfile("/bin/systemctl")
return ("/bin/systemctl", "reboot")
else
return ("/usr/sbin/reboot")
end
else
if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl")
return ("/usr/bin/sudo", "systemctl", "reboot")
else
return ("/usr/bin/sudo", "reboot")
end
end
else
return nothing
end
end
function do_reboot()
cmd = reboot_command()
if cmd === nothing
logmsg("Reboot not supported on this OS")
return false
end
# Build a readable command string for logs (escape each arg safely)
cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ")
if DRY_RUN
logmsg("DRY RUN: would run reboot command: $cmd_str")
return true
end
logmsg("Executing reboot command: $cmd_str")
try
# Construct a Cmd from an array so arguments are passed directly (no shell)
cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String}
run(Cmd(cmd_array))
return true
catch e
logmsg("Failed to execute reboot command: $e")
return false
end
end
# Single check iteration
function perform_check!(st::State)
# println(1)
success = false
last_result = nothing
for i in 1:ATTEMPTS_PER_CHECK
# ok, result = check_router_once(ROUTER_IP)
ok, result = values(JSON.parsefile("/home/ton/docker-programs/check_and_reboot/test_ping_result.json")) # for testing without actual ping
if ok
success = true
break
end
sleep(BACKOFF_BETWEEN_ATTEMPTS)
end
routerresult = isnothing(last_result) ? "no response" : last_result
# Check if we're in cooldown period
in_cooldown = false
if st.last_reboot_datetime !== nothing
timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int
if timepass < COOLDOWN_AFTER_REBOOT_SECS
in_cooldown = true
end
end
# @show in_cooldown
# println(2)
if in_cooldown
# println("2-1")
# During cooldown, track failures but don't trigger reboot yet
if success
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is reachable during cooldown. Router is back online! Resetting state.")
st.consecutive_fails = 0
save_state(st, StateFilePath)
else
# println("2-2")
st.consecutive_fails += 1
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is unreachable during cooldown. Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
save_state(st, StateFilePath)
end
return
end
# println(3)
# Outside cooldown - full check with potential reboot
if success
# println("3-1")
if st.consecutive_fails > 0
# println("3-2")
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
else
# logmsg("$ROUTER_IP is reachable.")
end
st.consecutive_fails = 0
save_state(st, StateFilePath)
return
else
st.consecutive_fails += 1
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
save_state(st, StateFilePath)
end
# println(4)
if st.consecutive_fails >= FAILS_TO_REBOOT
# println("4-1")
save_state(st, StateFilePath)
ok = do_reboot()
if ok
# println("4-2")
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("Reboot executed (or simulated). Resetting failure counter.")
st.consecutive_fails = 0
st.last_reboot_datetime = Dates.now()
@show st
@show StateFilePath
save_state(st, StateFilePath)
else
logmsg("Reboot attempt failed; will retry after next interval.")
end
end
# println(5)
end
function limitTextFileLines(LogFilePath::String; maxlines::Integer=100)
log = readlines(LogFilePath)
if length(log) < (maxlines - 1)
return nothing
end
reducedLog = ""
for i in log[end-(maxlines-1):end]
reducedLog = reducedLog * i * "\n"
end
write(LogFilePath, reducedLog)
end
# Main loop: runs indefinitely every CHECK_INTERVAL_SECS
function main_loop()
# Ensure log file exists
if !isfile(LogFilePath)
open(LogFilePath, "a") do io
# create empty file
end
end
# limit log file to latest 100 events
limitTextFileLines(LogFilePath; maxlines=100)
thisFilePath = @__FILE__
broadcast_msg("Broadcasting from file: $thisFilePath")
logmsg("Starting check loop. Checking router $ROUTER_IP every $(CHECK_INTERVAL_SECS) seconds.")
st = load_state(StateFilePath)
while true
try
perform_check!(st)
catch e
logmsg("Error during check: $e")
end
sleep(CHECK_INTERVAL_SECS)
end
end
# Run
main_loop()