diff --git a/providers/monitor.rb b/providers/monitor.rb index feb81e2c..a547985e 100644 --- a/providers/monitor.rb +++ b/providers/monitor.rb @@ -33,6 +33,10 @@ def whyrun_supported? service 'datadog-agent' do service_name node['datadog']['agent_name'] + # HACK: the restart can fail when we hit systemd's restart limits (by default, 5 starts every 10 seconds) + # To workaround this, retry once after 5 seconds, and a second time after 10 seconds + retries 2 + retry_delay 5 end end diff --git a/recipes/dd-agent.rb b/recipes/dd-agent.rb index d487527a..8d5877c3 100644 --- a/recipes/dd-agent.rb +++ b/recipes/dd-agent.rb @@ -107,6 +107,10 @@ def template_vars # rubocop:disable Metrics/AbcSize supports :restart => true, :status => true, :start => true, :stop => true end subscribes :restart, "template[#{agent_config_file}]", :delayed unless node['datadog']['agent_start'] == false + # HACK: the restart can fail when we hit systemd's restart limits (by default, 5 starts every 10 seconds) + # To workaround this, retry once after 5 seconds, and a second time after 10 seconds + retries 2 + retry_delay 5 end # Install integration packages