Auto-restart when unhealthy

2023-02-07 22:08:34 +05:30 · 2023-02-07 22:08:34 +05:30 · 6c800d8fc1
commit 6c800d8fc1
parent b880a1d03a
2 changed files with 26 additions and 26 deletions
--- a/healthcheck.js
+++ b/healthcheck.js
@ -1,7 +1,12 @@
+const fs = require('fs');
 const fetch = require('node-fetch');

 const URL = 'http://localhost:3001';

+const STATUS_FILE = 'healthcheck.status',
+    READY_FILE = 'healthcheck.ready',
+    LOG_FILE = 'healthcheck.log';
+
 const checks = [];

 //Check if UI is working and is type HTML
@ -72,15 +77,18 @@ checks.push(function check_lastBlock() {
 Promise.all(checks.map(c => c())).then(results => {
    let reasons = results.filter(r => r !== true);
    if (!reasons.length) {
-        console.debug("HEALTHY");
+        fs.writeFileSync(STATUS_FILE, "HEALTHY");
+        fs.writeFileSync(READY_FILE, "1");  //Indicate the node has reached healthy status atleast once
        process.exit(0);
    } else {
-        console.debug("UNHEALTHY");
-        console.debug(reasons);
+        fs.writeFileSync(STATUS_FILE, "UNHEALTHY");
+        let reason_log = `${new Date().toJSON()}:FAIL: ${JSON.stringify(reasons)}\n`;
+        fs.writeFileSync(LOG_FILE, reason_log, { flag: 'a' });
        process.exit(1);
    }
 }).catch(err => {
-    console.debug("ERROR");
-    console.error(err);
+    fs.writeFileSync(STATUS_FILE, "ERROR");
+    let err_log = `${new Date().toJSON()}:ERROR: ${err}\n`;
+    fs.writeFileSync(LOG_FILE, err_log, { flag: 'a' });
    process.exit(1);
 })
--- a/start.sh
+++ b/start.sh
@ -66,31 +66,23 @@ echo "Nginx Started."
 echo "Starting FLO Explorer $NETWORK"
 ./node_modules/flocore-node/bin/flocore-node start > /data/latest.log &
 # Store PID for later
-echo $! > /data/flosight.pid
-
-# Allow to startup
-timeout 1m tail -n 100 -f /data/latest.log
-
-# Initialize block sync check file
-curl --silent http://localhost:3001/api/status?q=getBestBlockHash > currentHealthCheck.log
-echo 'different' > previousHealthCheck.log
+echo $! > flosight.pid

 # Every 5 minutes
 while true; do
-	# Check to see if the most recent block hash is the same as the last time we checked.
-	#if [ "$(cat previousHealthCheck.log)" == "$(cat currentHealthCheck.log)" ] 
-	#then
-	#	# Restart instance
-	#	echo "NO NEW BLOCKS IN 5+ MINUTES - RESTARTING PROCESS"
-	#	kill -2 $(cat /data/flosight.pid)
-	#	wait $(cat /data/flosight.pid)
-	#	./node_modules/flocore-node/bin/flocore-node start >> /data/latest.log &
-	#	# Store PID for later
-	#	echo $! > /data/flosight.pid
-	#fi
+	
 	# Wait 5 minutes before checking again
 	timeout 5m tail -f /data/latest.log

-	mv currentHealthCheck.log previousHealthCheck.log
-	curl --silent http://localhost:3001/api/status?q=getBestBlockHash > currentHealthCheck.log
+	# Check the health of the node and restart if needed
+	if [ "$(cat healthcheck.status)" == "UNHEALTHY" ] && [ "$(cat healthcheck.ready)" == "1" ]; then
+		# Restart instance
+		echo "$(date): UNHEALTHY - RESTARTING PROCESS" >> /data/latest.log
+		kill -2 $(cat flosight.pid)
+		wait $(cat flosight.pid)
+		./node_modules/flocore-node/bin/flocore-node start >> /data/latest.log &
+		# Store PID for later
+		echo $! > flosight.pid
+	fi
+
 done;