1st commit
This commit is contained in:
290
check_router_reboot.jl
Normal file
290
check_router_reboot.jl
Normal file
@@ -0,0 +1,290 @@
|
||||
#!/usr/bin/env julia
|
||||
|
||||
# check_and_reboot_loop.jl
|
||||
# Usage: julia check_and_reboot_loop.jl
|
||||
# Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`)
|
||||
|
||||
# ------------------------------------------------------------------------------------------------ #
|
||||
# add the following to root's crontab (sudo crontab -e) #
|
||||
# ------------------------------------------------------------------------------------------------ #
|
||||
# *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia
|
||||
# @reboot /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1
|
||||
|
||||
|
||||
using Dates, Printf, JSON
|
||||
|
||||
# Configuration
|
||||
const ROUTER_IP = "192.168.88.1"
|
||||
const TIMEOUT_SECS = 30 # request timeout
|
||||
const ATTEMPTS_PER_CHECK = 3 # number of ping attempts per check
|
||||
const BACKOFF_BETWEEN_ATTEMPTS = 60 # seconds between attempts
|
||||
const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot
|
||||
const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds
|
||||
const DRY_RUN = true # set false to actually reboot
|
||||
const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds
|
||||
|
||||
const thisFolderPath = @__DIR__
|
||||
const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast
|
||||
const StateFilePath = "$thisFolderPath/check_and_reboot_state.json"
|
||||
|
||||
# Simple broadcast helper
|
||||
# Simple broadcast helper (safe Cmd construction)
|
||||
function broadcast_msg(msg::AbstractString)
|
||||
try
|
||||
if Sys.islinux()
|
||||
# Try wall if available by writing to its stdin
|
||||
wall_paths = ("/usr/bin/wall", "/bin/wall")
|
||||
for p in wall_paths
|
||||
if isfile(p)
|
||||
try
|
||||
proc = open(`$p`, "w")
|
||||
try
|
||||
write(proc, msg * "\n")
|
||||
finally
|
||||
close(proc)
|
||||
end
|
||||
return true
|
||||
catch
|
||||
# ignore and try next
|
||||
end
|
||||
end
|
||||
end
|
||||
# Fallback to logger (safe arg passing)
|
||||
try
|
||||
run(Cmd(["logger", msg]))
|
||||
return true
|
||||
catch
|
||||
end
|
||||
elseif Sys.isapple()
|
||||
# Use AppleScript notification as a fallback (escape double quotes)
|
||||
try
|
||||
escaped = replace(msg, "\"" => "\\\"")
|
||||
applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\""
|
||||
run(Cmd(["osascript", "-e", applescript]))
|
||||
return true
|
||||
catch
|
||||
end
|
||||
elseif Sys.iswindows()
|
||||
# Try msg to all sessions (may require privileges); best-effort
|
||||
try
|
||||
run(Cmd(["msg", "*", msg]))
|
||||
return true
|
||||
catch
|
||||
end
|
||||
end
|
||||
catch
|
||||
# swallow any unexpected errors
|
||||
end
|
||||
return false
|
||||
end
|
||||
|
||||
|
||||
|
||||
# Simple logging (prints, appends to LogFilePath, and broadcasts)
|
||||
function logmsg(s::AbstractString)
|
||||
t = Dates.now()
|
||||
out = "[$t] $s"
|
||||
# write to LogFilePath (append)
|
||||
try
|
||||
open(LogFilePath, "a") do io
|
||||
println(io, out)
|
||||
end
|
||||
catch e
|
||||
# If LogFilePath write fails, fallback to stdout
|
||||
println("[$t] (log write failed: $e) $s")
|
||||
end
|
||||
# Also print to stdout for immediate console visibility
|
||||
println(out)
|
||||
# Best-effort system broadcast so operators on console see it
|
||||
try
|
||||
broadcast_msg(out)
|
||||
catch
|
||||
# ignore broadcast failures
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
|
||||
# State handling
|
||||
mutable struct State
|
||||
consecutive_fails::Int
|
||||
last_reboot_datetime::Union{DateTime, Nothing}
|
||||
end
|
||||
|
||||
function load_state(StateFilePath)
|
||||
try
|
||||
if isfile(StateFilePath)
|
||||
jsonObj = JSON.parsefile(StateFilePath)
|
||||
cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0
|
||||
lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing
|
||||
return State(cf, DateTime(lr))
|
||||
end
|
||||
catch e
|
||||
logmsg("Warning loading state: $e")
|
||||
end
|
||||
return State(0, nothing)
|
||||
end
|
||||
|
||||
function save_state(st::State, StateFilePath)
|
||||
obj = Dict("consecutive_fails" => st.consecutive_fails,
|
||||
"last_reboot_datetime" => st.last_reboot_datetime)
|
||||
@show StateFilePath
|
||||
JSON.json(StateFilePath, obj)
|
||||
end
|
||||
|
||||
# Router ping check
|
||||
function check_router_once(ip::AbstractString; timeout=TIMEOUT_SECS)
|
||||
try
|
||||
# Use ping with count=1 and timeout in seconds
|
||||
# -W is timeout in seconds on Linux
|
||||
if Sys.islinux()
|
||||
cmd = `ping -c 1 -W $timeout $ip`
|
||||
result = run(cmd)
|
||||
return result.exitcode == 0, "exitcode: $(result.exitcode)"
|
||||
elseif Sys.isapple()
|
||||
cmd = `ping -c 1 -W $timeout $ip`
|
||||
result = run(cmd)
|
||||
return result.exitcode == 0, "exitcode: $(result.exitcode)"
|
||||
elseif Sys.iswindows()
|
||||
cmd = `ping -n 1 -w $((timeout * 1000)) $ip`
|
||||
result = run(cmd)
|
||||
return result.exitcode == 0, "exitcode: $(result.exitcode)"
|
||||
else
|
||||
return false, "unsupported OS"
|
||||
end
|
||||
catch e
|
||||
return false, string(e)
|
||||
end
|
||||
end
|
||||
|
||||
# Reboot command selection
|
||||
# Return program and separate args as plain strings
|
||||
function reboot_command()
|
||||
if Sys.iswindows()
|
||||
return ("/usr/bin/cmd", "/C", "shutdown /r /t 0")
|
||||
elseif Sys.isapple()
|
||||
return ("/usr/bin/sudo", "shutdown", "-r", "now")
|
||||
elseif Sys.islinux()
|
||||
if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl")
|
||||
return ("/usr/bin/sudo", "systemctl", "reboot")
|
||||
else
|
||||
return ("/usr/bin/sudo", "reboot")
|
||||
end
|
||||
else
|
||||
return nothing
|
||||
end
|
||||
end
|
||||
|
||||
function do_reboot()
|
||||
cmd = reboot_command()
|
||||
if cmd === nothing
|
||||
logmsg("Reboot not supported on this OS")
|
||||
return false
|
||||
end
|
||||
|
||||
# Build a readable command string for logs (escape each arg safely)
|
||||
cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ")
|
||||
|
||||
if DRY_RUN
|
||||
logmsg("DRY RUN: would run reboot command: $cmd_str")
|
||||
return true
|
||||
end
|
||||
|
||||
logmsg("Executing reboot command: $cmd_str")
|
||||
try
|
||||
# Construct a Cmd from an array so arguments are passed directly (no shell)
|
||||
cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String}
|
||||
run(Cmd(cmd_array))
|
||||
return true
|
||||
catch e
|
||||
logmsg("Failed to execute reboot command: $e")
|
||||
return false
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Single check iteration
|
||||
function perform_check!(st::State)
|
||||
if st.last_reboot_datetime !== nothing
|
||||
timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int
|
||||
|
||||
if timepass < COOLDOWN_AFTER_REBOOT_SECS
|
||||
logmsg("In cooldown after recent reboot; skipping check. $timepass/$COOLDOWN_AFTER_REBOOT_SECS seconds")
|
||||
return
|
||||
end
|
||||
end
|
||||
success = false
|
||||
last_result = nothing
|
||||
for i in 1:ATTEMPTS_PER_CHECK
|
||||
ok, result = check_router_once(ROUTER_IP)
|
||||
last_result = result
|
||||
if ok
|
||||
success = true
|
||||
break
|
||||
end
|
||||
sleep(BACKOFF_BETWEEN_ATTEMPTS)
|
||||
end
|
||||
if success
|
||||
if st.consecutive_fails > 0
|
||||
logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.")
|
||||
else
|
||||
# logmsg("$ROUTER_IP is reachable.")
|
||||
end
|
||||
st.consecutive_fails = 0
|
||||
save_state(st, StateFilePath)
|
||||
return
|
||||
else
|
||||
st.consecutive_fails += 1
|
||||
routerresult = isnothing(last_result) ? "no response" : last_result
|
||||
logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
|
||||
save_state(st, StateFilePath)
|
||||
end
|
||||
if st.consecutive_fails >= FAILS_TO_REBOOT
|
||||
save_state(st, StateFilePath)
|
||||
ok = do_reboot()
|
||||
if ok
|
||||
thisFilePath = @__FILE__
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("Reboot executed (or simulated). Resetting failure counter.")
|
||||
st.consecutive_fails = 0
|
||||
st.last_reboot_datetime = Dates.now()
|
||||
save_state(st, StateFilePath)
|
||||
else
|
||||
logmsg("Reboot attempt failed; will retry after next interval.")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
function limitTextFileLines(LogFilePath::String; maxlines::Integer=100)
|
||||
log = readlines(LogFilePath)
|
||||
if length(log) < (maxlines - 1)
|
||||
return nothing
|
||||
end
|
||||
reducedLog = ""
|
||||
for i in log[end-(maxlines-1):end]
|
||||
reducedLog = reducedLog * i * "\n"
|
||||
end
|
||||
write(LogFilePath, reducedLog)
|
||||
end
|
||||
|
||||
# Main loop: runs indefinitely every CHECK_INTERVAL_SECS
|
||||
function main_loop()
|
||||
# limit log file to latest 100 events
|
||||
limitTextFileLines(LogFilePath; maxlines=100)
|
||||
thisFilePath = @__FILE__
|
||||
broadcast_msg("Broadcasting from file: $thisFilePath")
|
||||
logmsg("Starting check loop. Checking router $ROUTER_IP every $(CHECK_INTERVAL_SECS) seconds.")
|
||||
st = load_state(StateFilePath)
|
||||
while true
|
||||
try
|
||||
perform_check!(st)
|
||||
catch e
|
||||
logmsg("Error during check: $e")
|
||||
end
|
||||
sleep(CHECK_INTERVAL_SECS)
|
||||
end
|
||||
end
|
||||
|
||||
# Run
|
||||
main_loop()
|
||||
Reference in New Issue
Block a user