Skip to content

Commit

Permalink
Add no-ops and flaky health check tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kaylareopelle committed Jan 3, 2025
1 parent 99e216b commit 6850ff7
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 17 deletions.
31 changes: 18 additions & 13 deletions lib/new_relic/agent/health_check.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,27 +32,23 @@ def initialize
# should only be reported if agent is "healthy" on shutdown
SHUTDOWN = {healthy: true, last_error: 'NR-APM-099', message: 'Agent has shutdown'}

# make a no-op
# should we have no-op methods so that we can call the health check methods
# in the agent code even if the health check isn't active?

def update_status(status, options = [])
@status = status
update_message(options) unless options.empty?
end

def update_message(options)
@status[:message] = sprintf(@status[:message], **options)
rescue StandardError => e
NewRelic::Agent.logger.debug("Error raised while updating agent control health check message: #{e}." \
"Reverting to original message.\noptions = #{options}, @status[:message] = #{@status[:message]}")
end

def create_and_run_health_check_loop
unless @fleet_id && @delivery_location && (@frequency > 0)
require_relative 'no_op_health_check'
NewRelic::Agent::HealthCheck.prepend(NewRelic::Agent::NoOpHealthCheck)
@continue = false
end

return NewRelic::Agent.logger.debug('agent_control.fleet_id not found, skipping health checks') unless @fleet_id
return NewRelic::Agent.logger.debug('agent_control.health.file_destination not found, skipping health checks') unless @delivery_location
return NewRelic::Agent.logger.debug('agent_control.health.delivery_location not found, skipping health checks') unless @delivery_location
return NewRelic::Agent.logger.debug('agent_control.health.frequency zero or less, skipping health checks') unless @frequency > 0

NewRelic::Agent.logger.debug('Agent control health check conditions met. Starting health checks.')
NewRelic::Agent.record_metric('Supportability/AgentControl/Health/enabled', 1)

Thread.new do
Expand All @@ -78,7 +74,7 @@ def status
end

def last_error
"last_error: #{@status[:last_error]}" unless @status[:healthy]
@status[:healthy] ? '' : "last_error: #{@status[:last_error]}"
end

def start_time
Expand Down Expand Up @@ -125,6 +121,15 @@ def create_file_path
)
@continue = false
end

private

def update_message(options)
@status[:message] = sprintf(@status[:message], **options)
rescue StandardError => e
NewRelic::Agent.logger.debug("Error raised while updating agent control health check message: #{e}." \
"Reverting to original message.\noptions = #{options}, @status[:message] = #{@status[:message]}")
end
end
end
end
10 changes: 10 additions & 0 deletions lib/new_relic/agent/no_op_health_check.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
module NewRelic
module Agent
module NoOpHealthCheck
# no-op methods prepended onto the HealthCheck class when health checks
# are disabled
def update_status(status, options = [])
end
end
end
end
7 changes: 7 additions & 0 deletions test/agent_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ def assert_log_contains(log, message)
"Could not find message: '#{message.inspect}'. Log contained: #{lines.join("\n")}"
end

def refute_log_contains(log, message)
lines = log.array

refute (lines.any? { |line| line.match(message) }),
"Found message: '#{message.inspect}'. Log contained: #{lines.join("\n")}"
end

def assert_audit_log_contains(audit_log_contents, needle)
# Original request bodies dumped to the log have symbol keys, but once
# they go through a dump/load, they're strings again, so we strip
Expand Down
14 changes: 14 additions & 0 deletions test/new_relic/agent/agent/connect_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,20 @@ def test_environment_for_connect_negative
end
end

def test_health_check_status_updated_when_connect_fails
with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide',
'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5'
) do
NewRelic::Agent.agent.stub(:should_connect?, -> (opts) { raise NewRelic::Agent::ServerConnectionException.new }) do
# assert_equal NewRelic::Agent::HealthCheck::HEALTHY, NewRelic::Agent.agent.health_check.instance_variable_get(:@status)
# NewRelic::Agent.agent.connect

assert_equal NewRelic::Agent::HealthCheck::FAILED_TO_CONNECT, NewRelic::Agent.agent.health_check.instance_variable_get(:@status)
end
end
end

private

def mocked_control
Expand Down
107 changes: 103 additions & 4 deletions test/new_relic/agent/health_check_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,6 @@ class NewRelicHealthCheckTest < Minitest::Test

def teardown
mocha_teardown
# TODO: maybe delete the file every time?
# FileUtils.rm_rf('health')
end

def test_yaml_health_file_written_to_delivery_location
Expand Down Expand Up @@ -148,7 +146,7 @@ def test_yaml_file_has_last_error_field_when_status_not_healthy
health_check = NewRelic::Agent::HealthCheck.new
health_check.update_status(NewRelic::Agent::HealthCheck::INVALID_LICENSE_KEY)
health_check.write_file

binding.irb
assert_predicate File.readlines('health/health-abc123.yml').grep(/last_error:/), :any?
end
end
Expand Down Expand Up @@ -222,5 +220,106 @@ def test_yaml_file_has_new_status_time_each_file
ensure
FileUtils.rm_rf('health')
end
## ADD MORE TESTS FOR ERROR CODE BEHAVIOR

def test_agent_health_started_if_required_info_present
with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide',
'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5'
) do
log = with_array_logger(:debug) do
health_check = NewRelic::Agent::HealthCheck.new
health_check.create_and_run_health_check_loop
end

assert_log_contains(log, 'Agent control health check conditions met. Starting health checks.')
refute_log_contains(log, 'agent_control.fleet_id not found')
refute_log_contains(log, 'agent_control.health.delivery_location not found')
refute_log_contains(log, 'agent_control.health.frequency zero or less')
end
end

def test_agent_health_not_generated_if_agent_control_fleet_id_absent
with_environment('NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5'
) do
log = with_array_logger(:debug) do
health_check = NewRelic::Agent::HealthCheck.new
# loop should exit before write_file is called
# raise an error if it's invoked
health_check.stub(:write_file, -> { raise 'kaboom!'} ) do
health_check.create_and_run_health_check_loop
end
end

assert_log_contains(log, 'agent_control.fleet_id not found')
refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.')
end
end

def test_agent_health_not_generated_if_delivery_location_absent
with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'mykonos',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5'
) do
log = with_array_logger(:debug) do
health_check = NewRelic::Agent::HealthCheck.new
# loop should exit before write_file is called
# raise an error if it's invoked
health_check.stub(:write_file, -> { raise 'kaboom!'} ) do
health_check.create_and_run_health_check_loop
end
end

assert_log_contains(log, 'agent_control.health.delivery_location not found')
refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.')
end
end

def test_agent_health_not_generated_if_frequency_is_zero
with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'anchors-away',
'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0'
) do
log = with_array_logger(:debug) do
health_check = NewRelic::Agent::HealthCheck.new
# loop should exit before write_file is called
# raise an error if it's invoked
health_check.stub(:write_file, -> { raise 'kaboom!'} ) do
health_check.create_and_run_health_check_loop
end
end

assert_log_contains(log, 'agent_control.health.frequency zero or less')
refute_log_contains(log, 'Agent control health check conditions met. Starting health checks.')
end
end

def test_agent_health_supportability_metric_generated_recorded_when_health_check_loop_starts
NewRelic::Agent.instance.stats_engine.clear_stats

with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => 'landslide',
'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => '/health',
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '5'
) do
health_check = NewRelic::Agent::HealthCheck.new
health_check.create_and_run_health_check_loop

assert_metrics_recorded({'Supportability/AgentControl/Health/enabled' => { call_count: 1 }})
end
end

def test_update_status_is_a_no_op_when_health_checks_disabled
with_environment('NEW_RELIC_AGENT_CONTROL_FLEET_ID' => nil,
'NEW_RELIC_AGENT_CONTROL_HEALTH_DELIVERY_LOCATION' => nil,
'NEW_RELIC_AGENT_CONTROL_HEALTH_FREQUENCY' => '0'
) do
health_check = NewRelic::Agent::HealthCheck.new

assert_equal NewRelic::Agent::HealthCheck::HEALTHY, health_check.instance_variable_get(:@status)

health_check.create_and_run_health_check_loop
health_check.update_status(NewRelic::Agent::HealthCheck::SHUTDOWN)

assert_equal NewRelic::Agent::HealthCheck::HEALTHY, health_check.instance_variable_get(:@status)
end
end
end

0 comments on commit 6850ff7

Please sign in to comment.