|
| 1 | +/* |
| 2 | + * Parseable Server (C) 2022 - 2024 Parseable, Inc. |
| 3 | + * |
| 4 | + * This program is free software: you can redistribute it and/or modify |
| 5 | + * it under the terms of the GNU Affero General Public License as |
| 6 | + * published by the Free Software Foundation, either version 3 of the |
| 7 | + * License, or (at your option) any later version. |
| 8 | + * |
| 9 | + * This program is distributed in the hope that it will be useful, |
| 10 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 11 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 12 | + * GNU Affero General Public License for more details. |
| 13 | + * |
| 14 | + * You should have received a copy of the GNU Affero General Public License |
| 15 | + * along with this program. If not, see <http://www.gnu.org/licenses/>. |
| 16 | + * |
| 17 | + */ |
| 18 | + |
| 19 | +use std::sync::{atomic::AtomicBool, Arc, LazyLock}; |
| 20 | + |
| 21 | +use actix_web::{ |
| 22 | + body::MessageBody, |
| 23 | + dev::{ServiceRequest, ServiceResponse}, |
| 24 | + error::Error, |
| 25 | + error::ErrorServiceUnavailable, |
| 26 | + middleware::Next, |
| 27 | +}; |
| 28 | +use tokio::{select, time::{interval, Duration}}; |
| 29 | +use tracing::{warn, trace, info}; |
| 30 | + |
| 31 | +use crate::analytics::{SYS_INFO, refresh_sys_info}; |
| 32 | +use crate::parseable::PARSEABLE; |
| 33 | + |
| 34 | +static RESOURCE_CHECK_ENABLED:LazyLock<Arc<AtomicBool>> = LazyLock::new(|| Arc::new(AtomicBool::new(false))); |
| 35 | + |
| 36 | +/// Spawn a background task to monitor system resources |
| 37 | +pub fn spawn_resource_monitor(shutdown_rx: tokio::sync::oneshot::Receiver<()>) { |
| 38 | + tokio::spawn(async move { |
| 39 | + let resource_check_interval = PARSEABLE.options.resource_check_interval; |
| 40 | + let mut check_interval = interval(Duration::from_secs(resource_check_interval)); |
| 41 | + let mut shutdown_rx = shutdown_rx; |
| 42 | + |
| 43 | + let cpu_threshold = PARSEABLE.options.cpu_utilization_threshold; |
| 44 | + let memory_threshold = PARSEABLE.options.memory_utilization_threshold; |
| 45 | + |
| 46 | + info!("Resource monitor started with thresholds - CPU: {:.1}%, Memory: {:.1}%", |
| 47 | + cpu_threshold, memory_threshold); |
| 48 | + loop { |
| 49 | + select! { |
| 50 | + _ = check_interval.tick() => { |
| 51 | + trace!("Checking system resource utilization..."); |
| 52 | + |
| 53 | + refresh_sys_info(); |
| 54 | + let (used_memory, total_memory, cpu_usage) = tokio::task::spawn_blocking(|| { |
| 55 | + let sys = SYS_INFO.lock().unwrap(); |
| 56 | + let used_memory = sys.used_memory() as f32; |
| 57 | + let total_memory = sys.total_memory() as f32; |
| 58 | + let cpu_usage = sys.global_cpu_usage(); |
| 59 | + (used_memory, total_memory, cpu_usage) |
| 60 | + }).await.unwrap(); |
| 61 | + |
| 62 | + let mut resource_ok = true; |
| 63 | + |
| 64 | + // Calculate memory usage percentage |
| 65 | + let memory_usage = if total_memory > 0.0 { |
| 66 | + (used_memory / total_memory) * 100.0 |
| 67 | + } else { |
| 68 | + 0.0 |
| 69 | + }; |
| 70 | + |
| 71 | + // Log current resource usage every few checks for debugging |
| 72 | + info!("Current resource usage - CPU: {:.1}%, Memory: {:.1}% ({:.1}GB/{:.1}GB)", |
| 73 | + cpu_usage, memory_usage, |
| 74 | + used_memory / 1024.0 / 1024.0 / 1024.0, |
| 75 | + total_memory / 1024.0 / 1024.0 / 1024.0); |
| 76 | + |
| 77 | + // Check memory utilization |
| 78 | + if memory_usage > memory_threshold { |
| 79 | + warn!("High memory usage detected: {:.1}% (threshold: {:.1}%)", |
| 80 | + memory_usage, memory_threshold); |
| 81 | + resource_ok = false; |
| 82 | + } |
| 83 | + |
| 84 | + // Check CPU utilization |
| 85 | + if cpu_usage > cpu_threshold { |
| 86 | + warn!("High CPU usage detected: {:.1}% (threshold: {:.1}%)", |
| 87 | + cpu_usage, cpu_threshold); |
| 88 | + resource_ok = false; |
| 89 | + } |
| 90 | + |
| 91 | + let previous_state = RESOURCE_CHECK_ENABLED.load(std::sync::atomic::Ordering::SeqCst); |
| 92 | + RESOURCE_CHECK_ENABLED.store(resource_ok, std::sync::atomic::Ordering::SeqCst); |
| 93 | + |
| 94 | + // Log state changes |
| 95 | + if previous_state != resource_ok { |
| 96 | + if resource_ok { |
| 97 | + info!("Resource utilization back to normal - requests will be accepted"); |
| 98 | + } else { |
| 99 | + warn!("Resource utilization too high - requests will be rejected"); |
| 100 | + } |
| 101 | + } |
| 102 | + }, |
| 103 | + _ = &mut shutdown_rx => { |
| 104 | + trace!("Resource monitor shutting down"); |
| 105 | + break; |
| 106 | + } |
| 107 | + } |
| 108 | + } |
| 109 | + }); |
| 110 | +} |
| 111 | + |
| 112 | +/// Middleware to check system resource utilization before processing requests |
| 113 | +/// Returns 503 Service Unavailable if resources are over-utilized |
| 114 | +pub async fn check_resource_utilization_middleware( |
| 115 | + req: ServiceRequest, |
| 116 | + next: Next<impl MessageBody>, |
| 117 | +) -> Result<ServiceResponse<impl MessageBody>, Error> { |
| 118 | + |
| 119 | + let resource_ok = RESOURCE_CHECK_ENABLED.load(std::sync::atomic::Ordering::SeqCst); |
| 120 | + |
| 121 | + if !resource_ok { |
| 122 | + let error_msg = "Server resources over-utilized"; |
| 123 | + warn!("Rejecting request to {} due to resource constraints", req.path()); |
| 124 | + return Err(ErrorServiceUnavailable(error_msg)); |
| 125 | + } |
| 126 | + |
| 127 | + // Continue processing the request if resource utilization is within limits |
| 128 | + next.call(req).await |
| 129 | +} |
0 commit comments