commit 321d04acc7dddf2890f38904640852d39b998f50 Author: narawat Date: Wed Mar 11 17:02:18 2026 +0700 1st commit diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 0000000..9815147 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,222 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.12.1" +manifest_format = "2.0" +project_hash = "324e0aa322681a327b337bcd7a8530af39c981d0" + +[[deps.Artifacts]] +uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.BitFlags]] +git-tree-sha1 = "0691e34b3bb8be9307330f88d1a3c3f25466c24d" +uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" +version = "0.1.9" + +[[deps.CodecZlib]] +deps = ["TranscodingStreams", "Zlib_jll"] +git-tree-sha1 = "962834c22b66e32aa10f7611c08c8ca4e20749a9" +uuid = "944b1d66-785c-5afd-91f1-9de20f533193" +version = "0.7.8" + +[[deps.ConcurrentUtilities]] +deps = ["Serialization", "Sockets"] +git-tree-sha1 = "d9d26935a0bcffc87d2613ce14c527c99fc543fd" +uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" +version = "2.5.0" + +[[deps.Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" + +[[deps.ExceptionUnwrapping]] +deps = ["Test"] +git-tree-sha1 = "d36f682e590a83d63d1c7dbd287573764682d12a" +uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" +version = "0.1.11" + +[[deps.HTTP]] +deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "PrecompileTools", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] +git-tree-sha1 = "5e6fe50ae7f23d171f44e311c2960294aaa0beb5" +uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" +version = "1.10.19" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.JLLWrappers]] +deps = ["Artifacts", "Preferences"] +git-tree-sha1 = "0533e564aae234aff59ab625543145446d8b6ec2" +uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" +version = "1.7.1" + +[[deps.JSON]] +deps = ["Dates", "Logging", "Parsers", "PrecompileTools", "StructUtils", "UUIDs", "Unicode"] +git-tree-sha1 = "eb04df293213df64ddd720c86de3c431f5f8ccf1" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "1.2.1" + + [deps.JSON.extensions] + JSONArrowExt = ["ArrowTypes"] + + [deps.JSON.weakdeps] + ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" + +[[deps.JuliaSyntaxHighlighting]] +deps = ["StyledStrings"] +uuid = "ac6e5ff7-fb65-4e79-a425-ec3bc9c03011" +version = "1.12.0" + +[[deps.Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "f00544d95982ea270145636c181ceda21c4e2575" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.2.0" + +[[deps.Markdown]] +deps = ["Base64", "JuliaSyntaxHighlighting", "StyledStrings"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.MbedTLS]] +deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] +git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" +uuid = "739be429-bea8-5141-9913-cc70e7f3736d" +version = "1.1.9" + +[[deps.MbedTLS_jll]] +deps = ["Artifacts", "JLLWrappers", "Libdl"] +git-tree-sha1 = "3cce3511ca2c6f87b19c34ffc623417ed2798cbd" +uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" +version = "2.28.10+0" + +[[deps.MozillaCACerts_jll]] +uuid = "14a3606d-f60d-562e-9121-12d972cd8159" +version = "2025.5.20" + +[[deps.NetworkOptions]] +uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" +version = "1.3.0" + +[[deps.OpenSSL]] +deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "NetworkOptions", "OpenSSL_jll", "Sockets"] +git-tree-sha1 = "386b47442468acfb1add94bf2d85365dea10cbab" +uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" +version = "1.6.0" + +[[deps.OpenSSL_jll]] +deps = ["Artifacts", "Libdl"] +uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" +version = "3.5.1+0" + +[[deps.Parsers]] +deps = ["Dates", "PrecompileTools", "UUIDs"] +git-tree-sha1 = "7d2f8f21da5db6a806faf7b9b292296da42b2810" +uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +version = "2.8.3" + +[[deps.PrecompileTools]] +deps = ["Preferences"] +git-tree-sha1 = "07a921781cab75691315adc645096ed5e370cb77" +uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +version = "1.3.3" + +[[deps.Preferences]] +deps = ["TOML"] +git-tree-sha1 = "0f27480397253da18fe2c12a4ba4eb9eb208bf3d" +uuid = "21216c6a-2e73-6563-6e65-726566657250" +version = "1.5.0" + +[[deps.Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.SimpleBufferStream]] +git-tree-sha1 = "f305871d2f381d21527c770d4788c06c097c9bc1" +uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" +version = "1.2.0" + +[[deps.Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.StructUtils]] +deps = ["Dates", "UUIDs"] +git-tree-sha1 = "79529b493a44927dd5b13dde1c7ce957c2d049e4" +uuid = "ec057cc2-7a8d-4b58-b3b3-92acb9f63b42" +version = "2.6.0" + + [deps.StructUtils.extensions] + StructUtilsMeasurementsExt = ["Measurements"] + StructUtilsTablesExt = ["Tables"] + + [deps.StructUtils.weakdeps] + Measurements = "eff96d63-e80a-5855-80a2-b1b0885c5ab7" + Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" + +[[deps.TOML]] +deps = ["Dates"] +uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" +version = "1.0.3" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" + +[[deps.TranscodingStreams]] +git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742" +uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" +version = "0.11.3" + +[[deps.URIs]] +git-tree-sha1 = "bef26fb046d031353ef97a82e3fdb6afe7f21b1a" +uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" +version = "1.6.1" + +[[deps.UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" + +[[deps.Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" + +[[deps.Zlib_jll]] +deps = ["Libdl"] +uuid = "83775a58-1f1d-513f-b197-d71354ab007a" +version = "1.3.1+2" diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..7978070 --- /dev/null +++ b/Project.toml @@ -0,0 +1,3 @@ +[deps] +HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" +JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" diff --git a/README.md b/README.md new file mode 100644 index 0000000..821137e --- /dev/null +++ b/README.md @@ -0,0 +1,5 @@ + + + +@reboot /usr/local/bin/juliar /home/ton/docker-programs/check_yiem_website_reboot/check_yiem_website_reboot.jl >> /var/log/check_reboot.log 2>&1 +# *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia \ No newline at end of file diff --git a/check_and_reboot_state.json b/check_and_reboot_state.json new file mode 100644 index 0000000..3d2cdaa --- /dev/null +++ b/check_and_reboot_state.json @@ -0,0 +1 @@ +{"last_reboot_datetime":"2026-01-31T12:56:40.834","consecutive_fails":0} \ No newline at end of file diff --git a/check_router_reboot.jl b/check_router_reboot.jl new file mode 100644 index 0000000..9e606ef --- /dev/null +++ b/check_router_reboot.jl @@ -0,0 +1,290 @@ +#!/usr/bin/env julia + +# check_and_reboot_loop.jl +# Usage: julia check_and_reboot_loop.jl +# Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`) + +# ------------------------------------------------------------------------------------------------ # +# add the following to root's crontab (sudo crontab -e) # +# ------------------------------------------------------------------------------------------------ # +# *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia +# @reboot /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_router_reboot.jl >> /var/log/check_reboot.log 2>&1 + + +using Dates, Printf, JSON + +# Configuration +const ROUTER_IP = "192.168.88.1" +const TIMEOUT_SECS = 30 # request timeout +const ATTEMPTS_PER_CHECK = 3 # number of ping attempts per check +const BACKOFF_BETWEEN_ATTEMPTS = 60 # seconds between attempts +const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot +const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds +const DRY_RUN = true # set false to actually reboot +const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds + +const thisFolderPath = @__DIR__ +const LogFilePath = "$thisFolderPath/check_router_reboot_log.txt" # write logs here and also broadcast +const StateFilePath = "$thisFolderPath/check_and_reboot_state.json" + +# Simple broadcast helper +# Simple broadcast helper (safe Cmd construction) +function broadcast_msg(msg::AbstractString) + try + if Sys.islinux() + # Try wall if available by writing to its stdin + wall_paths = ("/usr/bin/wall", "/bin/wall") + for p in wall_paths + if isfile(p) + try + proc = open(`$p`, "w") + try + write(proc, msg * "\n") + finally + close(proc) + end + return true + catch + # ignore and try next + end + end + end + # Fallback to logger (safe arg passing) + try + run(Cmd(["logger", msg])) + return true + catch + end + elseif Sys.isapple() + # Use AppleScript notification as a fallback (escape double quotes) + try + escaped = replace(msg, "\"" => "\\\"") + applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\"" + run(Cmd(["osascript", "-e", applescript])) + return true + catch + end + elseif Sys.iswindows() + # Try msg to all sessions (may require privileges); best-effort + try + run(Cmd(["msg", "*", msg])) + return true + catch + end + end + catch + # swallow any unexpected errors + end + return false +end + + + +# Simple logging (prints, appends to LogFilePath, and broadcasts) +function logmsg(s::AbstractString) + t = Dates.now() + out = "[$t] $s" + # write to LogFilePath (append) + try + open(LogFilePath, "a") do io + println(io, out) + end + catch e + # If LogFilePath write fails, fallback to stdout + println("[$t] (log write failed: $e) $s") + end + # Also print to stdout for immediate console visibility + println(out) + # Best-effort system broadcast so operators on console see it + try + broadcast_msg(out) + catch + # ignore broadcast failures + end +end + + + +# State handling +mutable struct State + consecutive_fails::Int + last_reboot_datetime::Union{DateTime, Nothing} +end + +function load_state(StateFilePath) + try + if isfile(StateFilePath) + jsonObj = JSON.parsefile(StateFilePath) + cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0 + lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing + return State(cf, DateTime(lr)) + end + catch e + logmsg("Warning loading state: $e") + end + return State(0, nothing) +end + +function save_state(st::State, StateFilePath) + obj = Dict("consecutive_fails" => st.consecutive_fails, + "last_reboot_datetime" => st.last_reboot_datetime) + @show StateFilePath + JSON.json(StateFilePath, obj) +end + +# Router ping check +function check_router_once(ip::AbstractString; timeout=TIMEOUT_SECS) + try + # Use ping with count=1 and timeout in seconds + # -W is timeout in seconds on Linux + if Sys.islinux() + cmd = `ping -c 1 -W $timeout $ip` + result = run(cmd) + return result.exitcode == 0, "exitcode: $(result.exitcode)" + elseif Sys.isapple() + cmd = `ping -c 1 -W $timeout $ip` + result = run(cmd) + return result.exitcode == 0, "exitcode: $(result.exitcode)" + elseif Sys.iswindows() + cmd = `ping -n 1 -w $((timeout * 1000)) $ip` + result = run(cmd) + return result.exitcode == 0, "exitcode: $(result.exitcode)" + else + return false, "unsupported OS" + end + catch e + return false, string(e) + end +end + +# Reboot command selection +# Return program and separate args as plain strings +function reboot_command() + if Sys.iswindows() + return ("/usr/bin/cmd", "/C", "shutdown /r /t 0") + elseif Sys.isapple() + return ("/usr/bin/sudo", "shutdown", "-r", "now") + elseif Sys.islinux() + if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl") + return ("/usr/bin/sudo", "systemctl", "reboot") + else + return ("/usr/bin/sudo", "reboot") + end + else + return nothing + end +end + +function do_reboot() + cmd = reboot_command() + if cmd === nothing + logmsg("Reboot not supported on this OS") + return false + end + + # Build a readable command string for logs (escape each arg safely) + cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ") + + if DRY_RUN + logmsg("DRY RUN: would run reboot command: $cmd_str") + return true + end + + logmsg("Executing reboot command: $cmd_str") + try + # Construct a Cmd from an array so arguments are passed directly (no shell) + cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String} + run(Cmd(cmd_array)) + return true + catch e + logmsg("Failed to execute reboot command: $e") + return false + end +end + + +# Single check iteration +function perform_check!(st::State) + if st.last_reboot_datetime !== nothing + timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int + + if timepass < COOLDOWN_AFTER_REBOOT_SECS + logmsg("In cooldown after recent reboot; skipping check. $timepass/$COOLDOWN_AFTER_REBOOT_SECS seconds") + return + end + end + success = false + last_result = nothing + for i in 1:ATTEMPTS_PER_CHECK + ok, result = check_router_once(ROUTER_IP) + last_result = result + if ok + success = true + break + end + sleep(BACKOFF_BETWEEN_ATTEMPTS) + end + if success + if st.consecutive_fails > 0 + logmsg("$ROUTER_IP is reachable; resetting consecutive failure counter.") + else + # logmsg("$ROUTER_IP is reachable.") + end + st.consecutive_fails = 0 + save_state(st, StateFilePath) + return + else + st.consecutive_fails += 1 + routerresult = isnothing(last_result) ? "no response" : last_result + logmsg("$ROUTER_IP is unreachable (last result: $routerresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.") + save_state(st, StateFilePath) + end + if st.consecutive_fails >= FAILS_TO_REBOOT + save_state(st, StateFilePath) + ok = do_reboot() + if ok + thisFilePath = @__FILE__ + broadcast_msg("Broadcasting from file: $thisFilePath") + logmsg("Reboot executed (or simulated). Resetting failure counter.") + st.consecutive_fails = 0 + st.last_reboot_datetime = Dates.now() + save_state(st, StateFilePath) + else + logmsg("Reboot attempt failed; will retry after next interval.") + end + end +end + + +function limitTextFileLines(LogFilePath::String; maxlines::Integer=100) + log = readlines(LogFilePath) + if length(log) < (maxlines - 1) + return nothing + end + reducedLog = "" + for i in log[end-(maxlines-1):end] + reducedLog = reducedLog * i * "\n" + end + write(LogFilePath, reducedLog) +end + +# Main loop: runs indefinitely every CHECK_INTERVAL_SECS +function main_loop() + # limit log file to latest 100 events + limitTextFileLines(LogFilePath; maxlines=100) + thisFilePath = @__FILE__ + broadcast_msg("Broadcasting from file: $thisFilePath") + logmsg("Starting check loop. Checking router $ROUTER_IP every $(CHECK_INTERVAL_SECS) seconds.") + st = load_state(StateFilePath) + while true + try + perform_check!(st) + catch e + logmsg("Error during check: $e") + end + sleep(CHECK_INTERVAL_SECS) + end +end + +# Run +main_loop() diff --git a/check_router_reboot_log.txt b/check_router_reboot_log.txt new file mode 100644 index 0000000..10972c3 --- /dev/null +++ b/check_router_reboot_log.txt @@ -0,0 +1,24 @@ +[2026-03-11T15:44:25.256] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T15:44:26.277] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:45:26.318] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:49:03.012] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T15:49:03.985] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:50:04.059] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:51:04.131] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:52:04.197] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T15:53:04.266] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:10:04.474] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:10:05.449] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:12:13.704] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:12:14.671] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:13:14.215] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:13:15.192] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:14:19.623] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:14:20.610] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:14:54.601] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:14:55.573] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:15:48.563] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:15:49.540] Error during check: SystemError("opening file \"/home/ton/docker-programs/check_and_reboot/check_and_reboot_state.json\"", 13, nothing) +[2026-03-11T16:27:47.181] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:30:28.572] Starting check loop. Checking router 192.168.88.1 every 60 seconds. +[2026-03-11T16:43:50.213] Starting check loop. Checking router 192.168.88.1 every 60 seconds. diff --git a/check_website_reboot_log.txt b/check_website_reboot_log.txt new file mode 100644 index 0000000..34e80e3 --- /dev/null +++ b/check_website_reboot_log.txt @@ -0,0 +1,106 @@ +[2026-01-17T07:42:53.348] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 2/3. +[2026-01-17T07:45:15.603] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 3/3. +[2026-01-17T07:45:15.711] Executing reboot command: /usr/bin/sudo systemctl reboot +[2026-01-17T07:45:15.844] Reboot executed (or simulated). Resetting failure counter. +[2026-01-17T07:46:19.663] Starting check loop. Checking every 60 seconds. +[2026-01-17T07:46:20.724] In cooldown after recent reboot; skipping check. 64/600 seconds +[2026-01-17T07:47:20.781] In cooldown after recent reboot; skipping check. 124/600 seconds +[2026-01-17T07:48:20.845] In cooldown after recent reboot; skipping check. 185/600 seconds +[2026-01-17T07:49:20.909] In cooldown after recent reboot; skipping check. 245/600 seconds +[2026-01-17T07:58:24.718] Starting check loop. Checking every 60 seconds. +[2026-01-17T08:02:44.083] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-17T08:03:46.093] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-17T08:11:50.378] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-17T08:13:03.108] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-18T02:02:57.693] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-18T02:04:15.005] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-18T03:01:07.757] Starting check loop. Checking every 60 seconds. +[2026-01-19T02:02:26.818] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-19T02:03:57.685] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-19T03:01:06.405] Starting check loop. Checking every 60 seconds. +[2026-01-20T02:03:01.083] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-20T02:04:20.344] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-20T03:01:03.911] Starting check loop. Checking every 60 seconds. +[2026-01-21T02:02:55.759] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-21T02:04:13.646] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-21T03:01:02.548] Starting check loop. Checking every 60 seconds. +[2026-01-22T02:03:04.272] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-22T02:04:25.563] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-22T03:01:06.490] Starting check loop. Checking every 60 seconds. +[2026-01-23T02:03:16.047] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-23T02:04:27.550] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-23T03:01:05.579] Starting check loop. Checking every 60 seconds. +[2026-01-24T02:03:08.185] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-24T02:04:27.249] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-24T03:01:07.323] Starting check loop. Checking every 60 seconds. +[2026-01-25T02:02:53.191] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-25T02:04:13.325] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-25T03:01:04.006] Starting check loop. Checking every 60 seconds. +[2026-01-26T02:02:44.564] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-26T02:04:04.361] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-26T03:01:04.990] Starting check loop. Checking every 60 seconds. +[2026-01-26T22:41:55.335] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-26T22:43:32.534] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-26T23:21:13.286] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-26T23:22:26.041] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-27T02:02:57.657] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-27T02:04:16.197] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-27T03:01:04.404] Starting check loop. Checking every 60 seconds. +[2026-01-28T02:02:24.630] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-28T02:03:48.679] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-28T03:01:04.253] Starting check loop. Checking every 60 seconds. +[2026-01-29T02:02:20.126] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-29T02:03:41.927] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-29T03:02:31.268] Starting check loop. Checking every 60 seconds. +[2026-01-30T02:02:39.702] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-30T02:04:01.710] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-30T03:01:04.653] Starting check loop. Checking every 60 seconds. +[2026-01-31T02:02:49.386] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T02:04:06.540] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-31T03:01:04.808] Starting check loop. Checking every 60 seconds. +[2026-01-31T12:08:43.831] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T12:10:00.803] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-31T12:15:11.347] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T12:16:53.757] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 2/3. +[2026-01-31T12:18:46.546] https://www.yiem.cc is reachable; resetting consecutive failure counter. +[2026-01-31T12:25:07.824] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T12:26:45.831] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 2/3. +[2026-01-31T12:28:23.274] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 3/3. +[2026-01-31T12:28:23.341] Executing reboot command: /usr/bin/sudo systemctl reboot +[2026-01-31T12:28:23.408] Reboot executed (or simulated). Resetting failure counter. +[2026-01-31T12:29:28.689] Starting check loop. Checking every 60 seconds. +[2026-01-31T12:29:29.768] In cooldown after recent reboot; skipping check. 66/600 seconds +[2026-01-31T12:30:29.822] In cooldown after recent reboot; skipping check. 126/600 seconds +[2026-01-31T12:31:29.886] In cooldown after recent reboot; skipping check. 186/600 seconds +[2026-01-31T12:32:29.891] In cooldown after recent reboot; skipping check. 246/600 seconds +[2026-01-31T12:33:29.911] In cooldown after recent reboot; skipping check. 306/600 seconds +[2026-01-31T12:34:29.927] In cooldown after recent reboot; skipping check. 366/600 seconds +[2026-01-31T12:35:29.977] In cooldown after recent reboot; skipping check. 426/600 seconds +[2026-01-31T12:36:29.991] In cooldown after recent reboot; skipping check. 486/600 seconds +[2026-01-31T12:37:30.011] In cooldown after recent reboot; skipping check. 546/600 seconds +[2026-01-31T12:39:16.611] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T12:40:54.802] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 2/3. +[2026-01-31T12:42:32.584] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 3/3. +[2026-01-31T12:42:32.698] Executing reboot command: /usr/bin/sudo systemctl reboot +[2026-01-31T12:42:32.810] Reboot executed (or simulated). Resetting failure counter. +[2026-01-31T12:43:36.151] Starting check loop. Checking every 60 seconds. +[2026-01-31T12:43:37.230] In cooldown after recent reboot; skipping check. 64/600 seconds +[2026-01-31T12:44:37.294] In cooldown after recent reboot; skipping check. 124/600 seconds +[2026-01-31T12:45:37.313] In cooldown after recent reboot; skipping check. 184/600 seconds +[2026-01-31T12:46:37.373] In cooldown after recent reboot; skipping check. 244/600 seconds +[2026-01-31T12:47:37.425] In cooldown after recent reboot; skipping check. 304/600 seconds +[2026-01-31T12:48:37.463] In cooldown after recent reboot; skipping check. 364/600 seconds +[2026-01-31T12:49:37.525] In cooldown after recent reboot; skipping check. 424/600 seconds +[2026-01-31T12:50:37.589] In cooldown after recent reboot; skipping check. 484/600 seconds +[2026-01-31T12:51:37.613] In cooldown after recent reboot; skipping check. 544/600 seconds +[2026-01-31T12:53:24.216] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 1/3. +[2026-01-31T12:55:02.985] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 2/3. +[2026-01-31T12:56:40.620] https://www.yiem.cc is unreachable (last HTTP status: no response). Consecutive fails: 3/3. +[2026-01-31T12:56:40.724] Executing reboot command: /usr/bin/sudo systemctl reboot +[2026-01-31T12:56:40.832] Reboot executed (or simulated). Resetting failure counter. +[2026-01-31T12:57:42.134] Starting check loop. Checking every 60 seconds. +[2026-01-31T12:57:43.218] In cooldown after recent reboot; skipping check. 62/600 seconds +[2026-01-31T12:58:43.279] In cooldown after recent reboot; skipping check. 122/600 seconds +[2026-01-31T12:59:43.319] In cooldown after recent reboot; skipping check. 182/600 seconds +[2026-01-31T13:00:43.383] In cooldown after recent reboot; skipping check. 242/600 seconds +[2026-01-31T13:01:43.447] In cooldown after recent reboot; skipping check. 302/600 seconds diff --git a/check_yiem_website_reboot.jl b/check_yiem_website_reboot.jl new file mode 100644 index 0000000..e58df27 --- /dev/null +++ b/check_yiem_website_reboot.jl @@ -0,0 +1,277 @@ +#!/usr/bin/env julia + +# check_and_reboot_loop.jl +# Usage: julia check_and_reboot_loop.jl +# Requires HTTP.jl (install with `julia -e 'using Pkg; Pkg.add("HTTP")'`) + +# ------------------------------------------------------------------------------------------------ # +# add the following to root's crontab (sudo crontab -e) # +# ------------------------------------------------------------------------------------------------ # +# *** juliar is root's julia (sudo crontab -e) but I symlinked to juliar because I want to seperate it from user's julia +# @reboot /usr/local/bin/juliar /home/ton/docker-programs/check_and_reboot/check_yiem_website_reboot.jl >> /var/log/check_reboot.log 2>&1 + + + +using Dates, Printf, HTTP, JSON + +# Configuration +const URL = "https://www.yiem.cc" +const TIMEOUT_SECS = 30 # request timeout +const ATTEMPTS_PER_CHECK = 3 # number of HTTP attempts per check +const BACKOFF_BETWEEN_ATTEMPTS = 60 # seconds between attempts +const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot +const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds +const DRY_RUN = false # set false to actually reboot +const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds + +const thisFolderPath = @__DIR__ +const LogFilePath = "$thisFolderPath/check_website_reboot_log.txt" # write logs here and also broadcast +const StateFilePath = "$thisFolderPath/check_and_reboot_state.json" + +# Simple broadcast helper +# Simple broadcast helper (safe Cmd construction) +function broadcast_msg(msg::AbstractString) + try + if Sys.islinux() + # Try wall if available by writing to its stdin + wall_paths = ("/usr/bin/wall", "/bin/wall") + for p in wall_paths + if isfile(p) + try + proc = open(`$p`, "w") + try + write(proc, msg * "\n") + finally + close(proc) + end + return true + catch + # ignore and try next + end + end + end + # Fallback to logger (safe arg passing) + try + run(Cmd(["logger", msg])) + return true + catch + end + elseif Sys.isapple() + # Use AppleScript notification as a fallback (escape double quotes) + try + escaped = replace(msg, "\"" => "\\\"") + applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\"" + run(Cmd(["osascript", "-e", applescript])) + return true + catch + end + elseif Sys.iswindows() + # Try msg to all sessions (may require privileges); best-effort + try + run(Cmd(["msg", "*", msg])) + return true + catch + end + end + catch + # swallow any unexpected errors + end + return false +end + + + +# Simple logging (prints, appends to LogFilePath, and broadcasts) +function logmsg(s::AbstractString) + t = Dates.now() + out = "[$t] $s" + # write to LogFilePath (append) + try + open(LogFilePath, "a") do io + println(io, out) + end + catch e + # If LogFilePath write fails, fallback to stdout + println("[$t] (log write failed: $e) $s") + end + # Also print to stdout for immediate console visibility + println(out) + # Best-effort system broadcast so operators on console see it + try + broadcast_msg(out) + catch + # ignore broadcast failures + end +end + + +# State handling +mutable struct State + consecutive_fails::Int + last_reboot_datetime::Union{DateTime, Nothing} +end + +function load_state(StateFilePath) + try + if isfile(StateFilePath) + jsonObj = JSON.parsefile(StateFilePath) + cf = haskey(jsonObj, "consecutive_fails") ? Int(jsonObj["consecutive_fails"]) : 0 + lr = haskey(jsonObj, "last_reboot_datetime") ? jsonObj["last_reboot_datetime"] : nothing + return State(cf, DateTime(lr)) + end + catch e + logmsg("Warning loading state: $e") + end + return State(0, nothing) +end + +function save_state(st::State, StateFilePath) + obj = Dict("consecutive_fails" => st.consecutive_fails, + "last_reboot_datetime" => st.last_reboot_datetime) + JSON.json(StateFilePath, obj) +end + +# HTTP check +function check_url_once(url::AbstractString; timeout=TIMEOUT_SECS) + try + resp = HTTP.request("GET", url; connect_timeout=timeout, read_timeout=timeout) + return 200 <= resp.status < 400, resp.status + catch e + return false, nothing + end +end + +# Reboot command selection +# Return program and separate args as plain strings +function reboot_command() + if Sys.iswindows() + return ("/usr/bin/cmd", "/C", "shutdown /r /t 0") + elseif Sys.isapple() + return ("/usr/bin/sudo", "shutdown", "-r", "now") + elseif Sys.islinux() + if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl") + return ("/usr/bin/sudo", "systemctl", "reboot") + else + return ("/usr/bin/sudo", "reboot") + end + else + return nothing + end +end + +function do_reboot() + cmd = reboot_command() + if cmd === nothing + logmsg("Reboot not supported on this OS") + return false + end + + # Build a readable command string for logs (escape each arg safely) + cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ") + + if DRY_RUN + logmsg("DRY RUN: would run reboot command: $cmd_str") + return true + end + + logmsg("Executing reboot command: $cmd_str") + try + # Construct a Cmd from an array so arguments are passed directly (no shell) + cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String} + run(Cmd(cmd_array)) + return true + catch e + logmsg("Failed to execute reboot command: $e") + return false + end +end + + +# Single check iteration +function perform_check!(st::State) + if st.last_reboot_datetime !== nothing + timepass = ((Dates.now() - st.last_reboot_datetime).value / 1000) |> floor |> Int + + if timepass < COOLDOWN_AFTER_REBOOT_SECS + logmsg("In cooldown after recent reboot; skipping check. $timepass/$COOLDOWN_AFTER_REBOOT_SECS seconds") + return + end + end + + success = false + last_code = nothing + for i in 1:ATTEMPTS_PER_CHECK + ok, code = check_url_once(URL) + last_code = code + if ok + success = true + break + end + sleep(BACKOFF_BETWEEN_ATTEMPTS) + end + + if success + if st.consecutive_fails > 0 + logmsg("$URL is reachable; resetting consecutive failure counter.") + else + # logmsg("$URL is reachable.") + end + st.consecutive_fails = 0 + save_state(st, StateFilePath) + return + else + st.consecutive_fails += 1 + httpresult = isnothing(last_code) ? "no response" : string(last_code) + logmsg("$URL is unreachable (last HTTP status: $httpresult). Consecutive fails: $(st.consecutive_fails)/$FAILS_TO_REBOOT.") + save_state(st, StateFilePath) + end + + if st.consecutive_fails >= FAILS_TO_REBOOT + save_state(st, StateFilePath) + ok = do_reboot() + if ok + thisFilePath = @__FILE__ + broadcast_msg("Broadcasting from file: $thisFilePath") + logmsg("Reboot executed (or simulated). Resetting failure counter.") + st.consecutive_fails = 0 + st.last_reboot_datetime = Dates.now() + save_state(st, StateFilePath) + else + logmsg("Reboot attempt failed; will retry after next interval.") + end + end +end + + +function limitTextFileLines(LogFilePath::String; maxlines::Integer=100) + log = readlines(LogFilePath) + if length(log) < (maxlines - 1) + return nothing + end + reducedLog = "" + for i in log[end-(maxlines-1):end] + reducedLog = reducedLog * i * "\n" + end + write(LogFilePath, reducedLog) +end + +# Main loop: runs indefinitely every CHECK_INTERVAL_SECS +function main_loop() + # limit log file to latest 100 events + limitTextFileLines(LogFilePath; maxlines=100) + thisFilePath = @__FILE__ + broadcast_msg("Broadcasting from file: $thisFilePath") + logmsg("Starting check loop. Checking every $(CHECK_INTERVAL_SECS) seconds.") + st = load_state(StateFilePath) + while true + try + perform_check!(st) + catch e + logmsg("Error during check: $e") + end + sleep(CHECK_INTERVAL_SECS) + end +end + +# Run +main_loop() diff --git a/etc.jl b/etc.jl new file mode 100644 index 0000000..f7d8852 --- /dev/null +++ b/etc.jl @@ -0,0 +1,340 @@ +using Dates +using Printf + +# Try to load HTTP; if not installed, give instruction and exit +try + @eval using HTTP +catch + println("HTTP.jl not found. Install it with: julia -e 'using Pkg; Pkg.add(\"HTTP\")'") + exit(1) +end + +# Configuration +const URL = "https://www.xxx.cc" +const TIMEOUT_SECS = 6 # request timeout +const ATTEMPTS_PER_CHECK = 3 # number of HTTP attempts per check +const BACKOFF_BETWEEN_ATTEMPTS = 2 # seconds between attempts +const FAILS_TO_REBOOT = 3 # consecutive failed checks required to trigger reboot +const COOLDOWN_AFTER_REBOOT_SECS = 600 # do not reboot again within this many seconds +const DRY_RUN = true # set false to actually reboot (DRY RUN true for testing) +const LOGFILE = "./check_website_reboot_log.txt" # write logs here and also broadcast +const CHECK_INTERVAL_SECS = 60 # run a check every CHECK_INTERVAL_SECS seconds + +# Persist state in current directory as requested +const STATE_FILE = "./check_and_reboot_state.json" + +# Simple broadcast helper (safe Cmd construction) +function broadcast_msg(msg::AbstractString) + try + if Sys.islinux() + # Try wall if available by writing to its stdin + wall_paths = ("/usr/bin/wall", "/bin/wall") + for p in wall_paths + if isfile(p) + try + proc = open(`$p`, "w") + try + write(proc, msg * "\n") + finally + close(proc) + end + return true + catch + # ignore and try next + end + end + end + # Fallback to logger (safe arg passing) + try + run(Cmd(["logger", msg])) + return true + catch + end + elseif Sys.isapple() + # Use AppleScript notification as a fallback (escape double quotes) + try + escaped = replace(msg, "\"" => "\\\"") + applescript = "display notification \"" * escaped * "\" with title \"check_and_reboot\"" + run(Cmd(["osascript", "-e", applescript])) + return true + catch + end + elseif Sys.iswindows() + # Try msg to all sessions (may require privileges); best-effort + try + run(Cmd(["msg", "*", msg])) + return true + catch + end + end + catch + # swallow any unexpected errors + end + return false +end + +# Simple logging (prints, appends to LOGFILE, and broadcasts) +function logmsg(s::AbstractString) + t = Dates.format(now(), "yyyy-mm-dd HH:MM:SS") + out = "[$t] $s" + # write to logfile (append) + try + open(LOGFILE, "a") do io + println(io, out) + end + catch e + # If logfile write fails, fallback to stdout + println("[$t] (log write failed: $e) $s") + end + # Also print to stdout for immediate console visibility + println(out) + # Best-effort system broadcast so operators on console see it + try + broadcast_msg(out) + catch + # ignore broadcast failures + end +end + +# Minimal JSON helper (uses JSON.jl if available, otherwise a tiny fallback) +module JSONHelper + export parse_obj, write_obj + function parse_obj(s::String) + try + @eval using JSON + return JSON.parse(s) + catch + try + return eval(Meta.parse(replace(s, "null"=>"nothing"))) + catch + return Dict{String,Any}() + end + end + end + function write_obj(io, obj::Dict) + try + @eval using JSON + JSON.print(io, obj) + catch + # naive serializer for simple dict of numbers/strings + print(io, "{") + first = true + for (k,v) in obj + if !first; print(io, ","); end + first = false + if isa(v, String) + # escape quotes and backslashes minimally + esc = replace(replace(v, "\\"=>"\\\\") , "\"" => "\\\"") + print(io, "\"$k\":\"$esc\"") + else + print(io, "\"$k\":$v") + end + end + print(io, "}") + end + end +end + +# State handling: store last_reboot as a DateTime +mutable struct State + consecutive_fails::Int + last_reboot::DateTime +end + +# Default epoch for "never rebooted" state +const NEVER_REBOOTED = DateTime(1970,1,1) + +function load_state() + try + if isfile(STATE_FILE) + s = read(STATE_FILE, String) + obj = JSONHelper.parse_obj(s) + cf = haskey(obj, "consecutive_fails") ? Int(obj["consecutive_fails"]) : 0 + lr = NEVER_REBOOTED + if haskey(obj, "last_reboot") && isa(obj["last_reboot"], String) + try + lr = DateTime(obj["last_reboot"]) # expects ISO-like string + catch + # ignore parse error, keep NEVER_REBOOTED + end + end + return State(cf, lr) + end + catch e + logmsg("Warning loading state: $e") + end + return State(0, NEVER_REBOOTED) +end + +# Atomic save_state to avoid partial/corrupted state across reboots +function save_state(st::State) + # write ISO-8601 string for DateTime + lr_str = Dates.format(st.last_reboot, Dates.ISODateTime) + obj = Dict("consecutive_fails" => st.consecutive_fails, + "last_reboot" => lr_str) + tmp = STATE_FILE * ".tmp" + try + open(tmp, "w") do io + JSONHelper.write_obj(io, obj) + end + mv(tmp, STATE_FILE; force=true) + catch e + logmsg("Warning: failed to write/replace state file: $e") + # attempt best-effort cleanup + try + isfile(tmp) && rm(tmp) + catch + end + end +end + +# Helper: system uptime on Linux (seconds), Inf on other OS or error +function system_uptime_seconds() + try + if Sys.islinux() + s = read("/proc/uptime", String) + return parse(Float64, split(s)[1]) + end + catch + end + return Inf +end + +# HTTP check +function check_url_once(url::AbstractString; timeout=TIMEOUT_SECS) + try + resp = HTTP.request("GET", url; connect_timeout=timeout, read_timeout=timeout) + return 200 <= resp.status < 400, resp.status + catch e + return false, nothing + end +end + +# Reboot command selection +# Return a tuple of strings (program and args) suitable for constructing Cmd +function reboot_command() + if Sys.iswindows() + return ("/usr/bin/cmd", "/C", "shutdown /r /t 0") + elseif Sys.isapple() + return ("/usr/bin/sudo", "shutdown", "-r", "now") + elseif Sys.islinux() + if isfile("/bin/systemctl") || isfile("/usr/bin/systemctl") + return ("/usr/bin/sudo", "systemctl", "reboot") + else + return ("/usr/bin/sudo", "reboot") + end + else + return nothing + end +end + +function do_reboot() + cmd = reboot_command() + if cmd === nothing + logmsg("Reboot not supported on this OS") + return false + end + + # Build a readable command string for logs (escape each arg safely) + cmd_str = join(map(x -> replace(x, '"' => "\\\""), cmd), " ") + + if DRY_RUN + logmsg("DRY RUN: would run reboot command: $cmd_str") + return true + end + + logmsg("Executing reboot command: $cmd_str") + try + # Construct a Cmd from an array so arguments are passed directly (no shell) + cmd_array = collect(cmd) # Tuple{String,...} -> Vector{String} + run(Cmd(cmd_array)) + return true + catch e + logmsg("Failed to execute reboot command: $e") + return false + end +end + +# Single check iteration +function perform_check!(st::State) + # If we're still within cooldown after a reboot, skip checks + if st.last_reboot != NEVER_REBOOTED + elapsed = now() - st.last_reboot + if elapsed < Second(COOLDOWN_AFTER_REBOOT_SECS) + remaining = Int(clamp(round((Second(COOLDOWN_AFTER_REBOOT_SECS) - elapsed).value), 0, typemax(Int))) + logmsg("In cooldown after recent reboot; skipping check for $remaining more seconds.") + return + end + end + + # Boot grace: skip checks if system just booted (helps prevent immediate reboots while services settle) + upt = system_uptime_seconds() + if Sys.islinux() && upt < 120 + logmsg("System boot grace active (uptime=$(round(upt))s); skipping check until uptime >= 120s.") + return + end + + success = false + last_code = nothing + for i in 1:ATTEMPTS_PER_CHECK + ok, code = check_url_once(URL) + last_code = code + if ok + success = true + break + end + sleep(BACKOFF_BETWEEN_ATTEMPTS) + end + + if success + if st.consecutive_fails > 0 + logmsg("Website reachable; resetting consecutive failure counter.") + else + logmsg("Website reachable.") + end + st.consecutive_fails = 0 + save_state(st) + return + else + st.consecutive_fails += 1 + logmsg(@sprintf("Website unreachable (last HTTP status: %s). Consecutive fails: %d/%d.", + isnothing(last_code) ? "no response" : string(last_code), + st.consecutive_fails, FAILS_TO_REBOOT)) + save_state(st) + end + + if st.consecutive_fails >= FAILS_TO_REBOOT + st.last_reboot = now() + save_state(st) + ok = do_reboot() + if ok + logmsg("Reboot executed (or simulated). Resetting failure counter.") + st.consecutive_fails = 0 + save_state(st) + else + logmsg("Reboot attempt failed; will retry after next interval.") + end + end +end + +# Main loop: runs indefinitely every CHECK_INTERVAL_SECS +function main_loop() + logmsg("Starting check loop. Checking every $(CHECK_INTERVAL_SECS) seconds.") + logmsg("STATE_FILE path: $(abspath(STATE_FILE))") + st = load_state() + # Log loaded state for visibility + age = st.last_reboot == NEVER_REBOOTED ? "never" : string(Int(round((now() - st.last_reboot).value))) + lr_str = st.last_reboot == NEVER_REBOOTED ? "never" : Dates.format(st.last_reboot, Dates.ISODateTime) + logmsg("Loaded state: consecutive_fails=$(st.consecutive_fails) last_reboot=$(lr_str) (age=${age}s)") + while true + try + perform_check!(st) + catch e + logmsg("Error during check: $e") + end + sleep(CHECK_INTERVAL_SECS) + end +end + +# Run +main_loop()