#!/usr/bin/env julia # check_and_reboot_loop.jl # Usage: julia check_and_reboot_loop.jl # Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`) # ------------------------------------------------------------------------------------------------ # # add the following to root's crontab (sudo crontab -e) # # ------------------------------------------------------------------------------------------------ # # *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia # @reboot /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1 # @reboot sleep 30 && /root/.juliaup/bin/julia /root/docker-apps/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1 using Dates, Printf, JSON # Configuration const ROUTER_IP = "192.168.88.1" const TIMEOUT_SECS = 30 # request timeout const ATTEMPTS_PER_CHECK = 1 # number of ping attempts per check const BACKOFF_BETWEEN_ATTEMPTS = 1 # seconds between ping attempts const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds const DRY_RUN = false # set false to actually reboot const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds const thisFolderPath = @__DIR__ const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast const StateFilePath = "$thisFolderPath/check_and_reboot_state.json" # Simple broadcast helper # Simple broadcast helper (safe Cmd construction) function broadcast_msg(msg::AbstractString) try if Sys.islinux() # Try wall if available by writing to its stdin wall_paths = ("/usr/bin/wall", "/bin/wall") for p in wall_paths if isfile(p) try proc = open(`$p`, "w") try write(proc, msg * "\n") finally close(proc) end return true catch # ignore and try next end end end # Fallback to logger (safe arg passing) try run(Cmd(["logger", msg])) return true catch end elseif Sys.isapple() # Use AppleScript notification as a fallback (escape double quotes) try escaped = replace(msg, "\"" => "\\\"") applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\"" run(Cmd(["osascript", "-e", applescript])) return true catch end elseif Sys.iswindows() # Try msg to all sessions (may require privileges); best-effort try run(Cmd(["msg", "*", msg])) return true catch end end catch # swallow any unexpected errors end return false end # Simple logging (prints, appends to LogFilePath, and broadcasts) function logmsg(s::AbstractString) t = Dates.now() out = "[$t] $s" # write to LogFilePath (append) try open(LogFilePath, "a") do io println(io, out) end catch e # If LogFilePath write fails, fallback to stdout println("[$t] (log write failed: $e) $s") end # Also print to stdout for immediate console visibility println(out) # Best-effort system broadcast so operators on console see it try broadcast_msg(out) catch # ignore broadcast failures end end # State handling mutable struct State consecutive_fails::Int last_reboot_datetime::Union{DateTime, Nothing} end function load_state(StateFilePath) try if isfile(StateFilePath) jsonObj = JSON.parsefile(StateFilePath) cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0 lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing return State(cf, DateTime(lr)) end catch e logmsg("Warning loading state: $e") end return State(0, nothing) end function save_state(st::State, StateFilePath) obj = Dict("consecutive_fails" => st.consecutive_fails, "last_reboot_datetime" => st.last_reboot_datetime) JSON.json(StateFilePath, obj) end # Router ping check function check_router_once(ip::AbstractString; timeout=TIMEOUT_SECS) try # Use ping with count=1 and timeout in seconds # -W is timeout in seconds on Linux if Sys.islinux() cmd = `ping -c 1 -W $timeout $ip` result = run(cmd) return result.exitcode == 0, "exitcode: $(result.exitcode)" elseif Sys.isapple() cmd = `ping -c 1 -W $timeout $ip` result = run(cmd) return result.exitcode == 0, "exitcode: $(result.exitcode)" elseif Sys.iswindows() cmd = `ping -n 1 -w $((timeout * 1000)) $ip` result = run(cmd) return result.exitcode == 0, "exitcode: $(result.exitcode)" else return false, "unsupported OS" end catch e return false, string(e) end end # Check if running as root function is_root()::Bool try result = read(`whoami`, String) return strip(result) == "root" catch return false end end # Reboot command selection # Return program and separate args as plain strings function reboot_command() if Sys.iswindows() return ("/usr/bin/cmd", "/C", "shutdown /r /t 0") elseif Sys.isapple() if is_root() return ("/sbin/reboot") else return ("/usr/bin/sudo", "shutdown", "-r", "now") end elseif Sys.islinux() if is_root() if isfile("/bin/systemctl") return ("/bin/systemctl", "reboot") else return ("/usr/sbin/reboot") end else if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl") return ("/usr/bin/sudo", "systemctl", "reboot") else return ("/usr/bin/sudo", "reboot") end end else return nothing end end function do_reboot() cmd = reboot_command() if cmd === nothing logmsg("Reboot not supported on this OS") return false end # Build a readable command string for logs (escape each arg safely) cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ") if DRY_RUN logmsg("DRY RUN: would run reboot command: $cmd_str") return true end logmsg("Executing reboot command: $cmd_str") try # Construct a Cmd from an array so arguments are passed directly (no shell) cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String} run(Cmd(cmd_array)) return true catch e logmsg("Failed to execute reboot command: $e") return false end end # Single check iteration function perform_check!(st::State) success = false last_result = nothing for i in 1:ATTEMPTS_PER_CHECK ok, result = check_router_once(ROUTER_IP) # ok, result = values(JSON.parsefile("test_ping_result.json")) # for testing without actual ping if ok success = true break end sleep(BACKOFF_BETWEEN_ATTEMPTS) end routerresult = isnothing(last_result) ? "no response" : last_result # Check if we're in cooldown period in_cooldown = false if st.last_reboot_datetime !== nothing timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int if timepass < COOLDOWN_AFTER_REBOOT_SECS in_cooldown = true end end if in_cooldown # During cooldown, track failures but don't trigger reboot yet if success logmsg("$ROUTER_IP is reachable during cooldown. Router is back online! Resetting state.") st.consecutive_fails = 0 save_state(st, StateFilePath) else st.consecutive_fails += 1 logmsg("$ROUTER_IP is unreachable during cooldown. Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.") save_state(st, StateFilePath) # Check if we've reached threshold by now if st.consecutive_fails >= FAILS_TO_REBOOT logmsg("Cooldown has expired and router is still unreachable. Triggering reboot.") ok = do_reboot() if ok thisFilePath = @__FILE__ broadcast_msg("Broadcasting from file: $thisFilePath") logmsg("Reboot executed (or simulated). Resetting failure counter.") st.consecutive_fails = 0 st.last_reboot_datetime = Dates.now() save_state(st, StateFilePath) else logmsg("Reboot attempt failed; will retry after next interval.") end end end return end # Outside cooldown - full check with potential reboot if success if st.consecutive_fails > 0 logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.") else # logmsg("$ROUTER_IP is reachable.") end st.consecutive_fails = 0 save_state(st, StateFilePath) return else st.consecutive_fails += 1 logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.") save_state(st, StateFilePath) end if st.consecutive_fails >= FAILS_TO_REBOOT save_state(st, StateFilePath) ok = do_reboot() if ok thisFilePath = @__FILE__ broadcast_msg("Broadcasting from file: $thisFilePath") logmsg("Reboot executed (or simulated). Resetting failure counter.") st.consecutive_fails = 0 st.last_reboot_datetime = Dates.now() save_state(st, StateFilePath) else logmsg("Reboot attempt failed; will retry after next interval.") end end end function limitTextFileLines(LogFilePath::String; maxlines::Integer=100) log = readlines(LogFilePath) if length(log) < (maxlines - 1) return nothing end reducedLog = "" for i in log[end-(maxlines-1):end] reducedLog = reducedLog * i * "\n" end write(LogFilePath, reducedLog) end # Main loop: runs indefinitely every CHECK_INTERVAL_SECS function main_loop() # Ensure log file exists if !isfile(LogFilePath) open(LogFilePath, "a") do io # create empty file end end # limit log file to latest 100 events limitTextFileLines(LogFilePath; maxlines=100) thisFilePath = @__FILE__ broadcast_msg("Broadcasting from file: $thisFilePath") logmsg("Starting check loop. Checking router $ROUTER_IP every $(CHECK_INTERVAL_SECS) seconds.") st = load_state(StateFilePath) while true try perform_check!(st) catch e logmsg("Error during check: $e") end sleep(CHECK_INTERVAL_SECS) end end # Run main_loop()