278 lines
8.7 KiB
Julia
278 lines
8.7 KiB
Julia
#!/usr/bin/env julia
|
|
|
|
# check_and_reboot_loop.jl
|
|
# Usage: julia check_and_reboot_loop.jl
|
|
# Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`)
|
|
|
|
# ------------------------------------------------------------------------------------------------ #
|
|
# add the following to root's crontab (sudo crontab -e) #
|
|
# ------------------------------------------------------------------------------------------------ #
|
|
# *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia
|
|
# @reboot /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_yiem_website_reboot.jl >> /var/log/check_reboot.log 2>&1
|
|
|
|
|
|
|
|
using Dates, Printf, HTTP, JSON
|
|
|
|
# Configuration
|
|
const URL = "https://www.yiem.cc"
|
|
const TIMEOUT_SECS = 30 # request timeout
|
|
const ATTEMPTS_PER_CHECK = 3 # number of HTTP attempts per check
|
|
const BACKOFF_BETWEEN_ATTEMPTS = 60 # seconds between attempts
|
|
const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot
|
|
const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds
|
|
const DRY_RUN = false # set false to actually reboot
|
|
const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds
|
|
|
|
const thisFolderPath = @__DIR__
|
|
const LogFilePath = "$thisFolderPath/check_website_reboot_log.txt" # write logs here and also broadcast
|
|
const StateFilePath = "$thisFolderPath/check_and_reboot_state.json"
|
|
|
|
# Simple broadcast helper
|
|
# Simple broadcast helper (safe Cmd construction)
|
|
function broadcast_msg(msg::AbstractString)
|
|
try
|
|
if Sys.islinux()
|
|
# Try wall if available by writing to its stdin
|
|
wall_paths = ("/usr/bin/wall", "/bin/wall")
|
|
for p in wall_paths
|
|
if isfile(p)
|
|
try
|
|
proc = open(`$p`, "w")
|
|
try
|
|
write(proc, msg * "\n")
|
|
finally
|
|
close(proc)
|
|
end
|
|
return true
|
|
catch
|
|
# ignore and try next
|
|
end
|
|
end
|
|
end
|
|
# Fallback to logger (safe arg passing)
|
|
try
|
|
run(Cmd(["logger", msg]))
|
|
return true
|
|
catch
|
|
end
|
|
elseif Sys.isapple()
|
|
# Use AppleScript notification as a fallback (escape double quotes)
|
|
try
|
|
escaped = replace(msg, "\"" => "\\\"")
|
|
applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\""
|
|
run(Cmd(["osascript", "-e", applescript]))
|
|
return true
|
|
catch
|
|
end
|
|
elseif Sys.iswindows()
|
|
# Try msg to all sessions (may require privileges); best-effort
|
|
try
|
|
run(Cmd(["msg", "*", msg]))
|
|
return true
|
|
catch
|
|
end
|
|
end
|
|
catch
|
|
# swallow any unexpected errors
|
|
end
|
|
return false
|
|
end
|
|
|
|
|
|
|
|
# Simple logging (prints, appends to LogFilePath, and broadcasts)
|
|
function logmsg(s::AbstractString)
|
|
t = Dates.now()
|
|
out = "[$t] $s"
|
|
# write to LogFilePath (append)
|
|
try
|
|
open(LogFilePath, "a") do io
|
|
println(io, out)
|
|
end
|
|
catch e
|
|
# If LogFilePath write fails, fallback to stdout
|
|
println("[$t] (log write failed: $e) $s")
|
|
end
|
|
# Also print to stdout for immediate console visibility
|
|
println(out)
|
|
# Best-effort system broadcast so operators on console see it
|
|
try
|
|
broadcast_msg(out)
|
|
catch
|
|
# ignore broadcast failures
|
|
end
|
|
end
|
|
|
|
|
|
# State handling
|
|
mutable struct State
|
|
consecutive_fails::Int
|
|
last_reboot_datetime::Union{DateTime, Nothing}
|
|
end
|
|
|
|
function load_state(StateFilePath)
|
|
try
|
|
if isfile(StateFilePath)
|
|
jsonObj = JSON.parsefile(StateFilePath)
|
|
cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0
|
|
lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing
|
|
return State(cf, DateTime(lr))
|
|
end
|
|
catch e
|
|
logmsg("Warning loading state: $e")
|
|
end
|
|
return State(0, nothing)
|
|
end
|
|
|
|
function save_state(st::State, StateFilePath)
|
|
obj = Dict("consecutive_fails" => st.consecutive_fails,
|
|
"last_reboot_datetime" => st.last_reboot_datetime)
|
|
JSON.json(StateFilePath, obj)
|
|
end
|
|
|
|
# HTTP check
|
|
function check_url_once(url::AbstractString; timeout=TIMEOUT_SECS)
|
|
try
|
|
resp = HTTP.request("GET", url; connect_timeout=timeout, read_timeout=timeout)
|
|
return 200 <= resp.status < 400, resp.status
|
|
catch e
|
|
return false, nothing
|
|
end
|
|
end
|
|
|
|
# Reboot command selection
|
|
# Return program and separate args as plain strings
|
|
function reboot_command()
|
|
if Sys.iswindows()
|
|
return ("/usr/bin/cmd", "/C", "shutdown /r /t 0")
|
|
elseif Sys.isapple()
|
|
return ("/usr/bin/sudo", "shutdown", "-r", "now")
|
|
elseif Sys.islinux()
|
|
if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl")
|
|
return ("/usr/bin/sudo", "systemctl", "reboot")
|
|
else
|
|
return ("/usr/bin/sudo", "reboot")
|
|
end
|
|
else
|
|
return nothing
|
|
end
|
|
end
|
|
|
|
function do_reboot()
|
|
cmd = reboot_command()
|
|
if cmd === nothing
|
|
logmsg("Reboot not supported on this OS")
|
|
return false
|
|
end
|
|
|
|
# Build a readable command string for logs (escape each arg safely)
|
|
cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ")
|
|
|
|
if DRY_RUN
|
|
logmsg("DRY RUN: would run reboot command: $cmd_str")
|
|
return true
|
|
end
|
|
|
|
logmsg("Executing reboot command: $cmd_str")
|
|
try
|
|
# Construct a Cmd from an array so arguments are passed directly (no shell)
|
|
cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String}
|
|
run(Cmd(cmd_array))
|
|
return true
|
|
catch e
|
|
logmsg("Failed to execute reboot command: $e")
|
|
return false
|
|
end
|
|
end
|
|
|
|
|
|
# Single check iteration
|
|
function perform_check!(st::State)
|
|
if st.last_reboot_datetime !== nothing
|
|
timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int
|
|
|
|
if timepass < COOLDOWN_AFTER_REBOOT_SECS
|
|
logmsg("In cooldown after recent reboot; skipping check. $timepass/$COOLDOWN_AFTER_REBOOT_SECS seconds")
|
|
return
|
|
end
|
|
end
|
|
|
|
success = false
|
|
last_code = nothing
|
|
for i in 1:ATTEMPTS_PER_CHECK
|
|
ok, code = check_url_once(URL)
|
|
last_code = code
|
|
if ok
|
|
success = true
|
|
break
|
|
end
|
|
sleep(BACKOFF_BETWEEN_ATTEMPTS)
|
|
end
|
|
|
|
if success
|
|
if st.consecutive_fails > 0
|
|
logmsg("$URL is reachable; resetting consecutive failure counter.")
|
|
else
|
|
# logmsg("$URL is reachable.")
|
|
end
|
|
st.consecutive_fails = 0
|
|
save_state(st, StateFilePath)
|
|
return
|
|
else
|
|
st.consecutive_fails += 1
|
|
httpresult = isnothing(last_code) ? "no response" : string(last_code)
|
|
logmsg("$URL is unreachable (last HTTP status: $httpresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.")
|
|
save_state(st, StateFilePath)
|
|
end
|
|
|
|
if st.consecutive_fails >= FAILS_TO_REBOOT
|
|
save_state(st, StateFilePath)
|
|
ok = do_reboot()
|
|
if ok
|
|
thisFilePath = @__FILE__
|
|
broadcast_msg("Broadcasting from file: $thisFilePath")
|
|
logmsg("Reboot executed (or simulated). Resetting failure counter.")
|
|
st.consecutive_fails = 0
|
|
st.last_reboot_datetime = Dates.now()
|
|
save_state(st, StateFilePath)
|
|
else
|
|
logmsg("Reboot attempt failed; will retry after next interval.")
|
|
end
|
|
end
|
|
end
|
|
|
|
|
|
function limitTextFileLines(LogFilePath::String; maxlines::Integer=100)
|
|
log = readlines(LogFilePath)
|
|
if length(log) < (maxlines - 1)
|
|
return nothing
|
|
end
|
|
reducedLog = ""
|
|
for i in log[end-(maxlines-1):end]
|
|
reducedLog = reducedLog * i * "\n"
|
|
end
|
|
write(LogFilePath, reducedLog)
|
|
end
|
|
|
|
# Main loop: runs indefinitely every CHECK_INTERVAL_SECS
|
|
function main_loop()
|
|
# limit log file to latest 100 events
|
|
limitTextFileLines(LogFilePath; maxlines=100)
|
|
thisFilePath = @__FILE__
|
|
broadcast_msg("Broadcasting from file: $thisFilePath")
|
|
logmsg("Starting check loop. Checking every $(CHECK_INTERVAL_SECS) seconds.")
|
|
st = load_state(StateFilePath)
|
|
while true
|
|
try
|
|
perform_check!(st)
|
|
catch e
|
|
logmsg("Error during check: $e")
|
|
end
|
|
sleep(CHECK_INTERVAL_SECS)
|
|
end
|
|
end
|
|
|
|
# Run
|
|
main_loop()
|