Skip to content

Commit

Permalink
Merge pull request #13 from cypherglassdotcom/new-features
Browse files Browse the repository at this point in the history
Recovery alerts
  • Loading branch information
leordev authored Jun 5, 2018
2 parents bc16a3d + f98c5e3 commit 6cc4e8e
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 32 deletions.
6 changes: 6 additions & 0 deletions backend/lib/windshield/alerts.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,21 @@ defmodule Windshield.Alerts do
import WindshieldWeb, only: [main_address: 0]

@unanswered_ping "UNANSWERED_PING"
@restored_ping "RESTORED_PING"
@bp_not_producing "BP_NOT_PRODUCING"
@restored_production "RESTORED_PRODUCTION"
@unsynched_blocks "UNSYNCHED_BLOCKS"
@voting_position "VOTING_POSITION"
@restored_voting_position "RESTORED_VOTING_POSITION"
@nodes_full_fork_report "NODES_FULL_FORK_REPORT"

def unanswered_ping, do: @unanswered_ping
def restored_ping, do: @restored_ping
def bp_not_producing, do: @bp_not_producing
def restored_production, do: @restored_production
def unsynched_blocks, do: @unsynched_blocks
def voting_position, do: @voting_position
def restored_voting_position, do: @restored_voting_position
def nodes_full_fork_report, do: @nodes_full_fork_report

def alert_mail(type, description) do
Expand Down
99 changes: 69 additions & 30 deletions backend/lib/windshield/node.ex
Original file line number Diff line number Diff line change
Expand Up @@ -166,30 +166,23 @@ defmodule Windshield.Node do
bp_paused = check_bp_pause(state)

# apply UTC timezone
last_produced_block_at = state.last_produced_block_at <> "Z"

last_production_datetime =
case DateTime.from_iso8601(last_produced_block_at) do
{:ok, datetime, 0} ->
DateTime.to_unix(datetime) * 1_000_000_000

{:error, _} ->
0
end
{last_produced_block_at, last_production_datetime} =
calc_production_time(state.last_produced_block_at)

same_alert_interval = state.settings["same_alert_interval_mins"] * 60_000_000_000

last_production_diff_secs = (System.os_time() - last_production_datetime) / 1_000_000_000

last_bpcheck_alert_at =
with last_production_diff <- System.os_time() - last_production_datetime,
false <- bp_paused, # do not alert if block production is paused
with false <- bp_paused, # do not alert if block production is paused
true <- state.vote_position <= 21, # should alert only if bp is under top 21
true <-
last_production_diff / 1_000_000_000 > state.settings["bp_tolerance_time_secs"],
last_production_diff_secs > state.settings["bp_tolerance_time_secs"],
last_bpcheck_alert_interval <- System.os_time() - state.last_bpcheck_alert_at,
true <- last_bpcheck_alert_interval > same_alert_interval do
error = """
Block Producer Node #{state.name} is not producing for a while.
Last Block production registered at #{state.last_produced_block_at} UTC.
Last Block production registered at #{last_produced_block_at} UTC.
"""

Database.insert_alert(Alerts.bp_not_producing(), error, nil)
Expand Down Expand Up @@ -221,6 +214,7 @@ defmodule Windshield.Node do

{:error, last_ping_alert}
else
broadcast_restored_ping(state)
{:active, state.last_ping_alert_at}
end

Expand Down Expand Up @@ -253,20 +247,33 @@ defmodule Windshield.Node do
{:noreply, new_state}
end

def handle_cast({:update_block, block_info}, state) do
if block_info["producer"] == state.account do
last_produced_block = block_info["block_num"]
last_produced_block_at = block_info["timestamp"]

{:noreply,
%{
state
| last_produced_block: last_produced_block,
last_produced_block_at: last_produced_block_at
}}
else
{:noreply, state}
def handle_cast({:update_produced_block, block_info}, state) do
new_produced_block = block_info["block_num"]
new_produced_block_at = block_info["timestamp"]

{last_produced_block_at, last_production_datetime} =
calc_production_time(state.last_produced_block_at)

{new_produced_block_at_txt, new_production_datetime} =
calc_production_time(new_produced_block_at)

if last_production_datetime < state.last_bpcheck_alert_at &&
new_production_datetime > state.last_bpcheck_alert_at do
msg =
"""
Block Producer Node #{state.name} came back at full steam production!
New Block production registered at #{new_produced_block_at_txt} UTC,
and the last one before that was #{last_produced_block_at}.
"""
Database.insert_alert(Alerts.restored_production(), msg)
end

{:noreply,
%{
state
| last_produced_block: new_produced_block,
last_produced_block_at: new_produced_block_at
}}
end

def handle_cast({:update_votes, votes_count, vote_percentage, bp_vote_position}, state) do
Expand All @@ -277,14 +284,22 @@ defmodule Windshield.Node do
vote_position: bp_vote_position
}

if new_state.vote_position != state.vote_position && state.type == "BP" &&
state.vote_position > 0 && state.is_watchable do
if new_state.vote_position != state.vote_position &&
state.vote_position > 0 && state.vote_position != 9999 &&
state.is_watchable do
msg = """
The BP #{state.account} has changed the voting position rank
from #{state.vote_position} to #{new_state.vote_position}.
"""

Database.insert_alert(Alerts.voting_position(), msg, nil)
alert_type =
if new_state.vote_position < state.vote_position do
Alerts.restored_voting_position()
else
Alerts.voting_position()
end

Database.insert_alert(alert_type, msg, nil)
end

{:noreply, new_state}
Expand Down Expand Up @@ -329,6 +344,22 @@ defmodule Windshield.Node do
}
end

def calc_production_time(last_prod_time) do
# apply UTC timezone
last_produced_block_at = last_prod_time <> "Z"

last_production_datetime =
case DateTime.from_iso8601(last_produced_block_at) do
{:ok, datetime, 0} ->
DateTime.to_unix(datetime) * 1_000_000_000

{:error, _} ->
0
end

{last_produced_block_at, last_production_datetime}
end

def ping_info(state) do
start = System.os_time()

Expand Down Expand Up @@ -370,6 +401,14 @@ defmodule Windshield.Node do
end
end

def broadcast_restored_ping(state) do
# only send alert if it had an error status before
if state.status == :error do
msg = "#{state.account} answered a successful ping and it's now restored!"
Database.insert_alert(Alerts.restored_ping(), msg)
end
end

def get_state(account) do
try do
[{"state", state}] = :ets.lookup(account, "state")
Expand Down
2 changes: 1 addition & 1 deletion backend/lib/windshield/principal_monitor.ex
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ defmodule Windshield.PrincipalMonitor do
do_process_block(state.stats, state.producers, processing_block, info)

# update respective node
GenServer.cast(String.to_atom(info["producer"]), {:update_block, info})
GenServer.cast(String.to_atom(info["producer"]), {:update_produced_block, info})

# update state
new_state = %{state | stats: new_stats, producers: updated_producers}
Expand Down
8 changes: 7 additions & 1 deletion frontend/src/Update.elm
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,14 @@ update msg model =
alerts =
alert :: model.alerts

alertType =
if String.startsWith "RESTORED_" alert.alertType then
(Success alert.description)
else
(Error alert.description)

notifications =
Notification (Error alert.description) model.currentTime (toString alert.createdAt)
Notification alertType model.currentTime (toString alert.createdAt)
:: model.notifications
in
( { model | alerts = alerts, notifications = notifications }
Expand Down

0 comments on commit 6cc4e8e

Please sign in to comment.