diff --git a/Makefile b/Makefile index 018a938..9d22ea0 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,14 @@ else endif # DOCKER TASKS -up: ## Runs the containers in detached mode +up: ## Runs the containers in detached mode with default config $(DOCKER_COMPOSE) $(PREFIX) up -d --build +up-remote: ## Runs the containers in detached mode with remote-write config + PROMETHEUS_CONFIG_FILENAME=prometheus-remotewrite LOKI_CONFIG_FILENAME=loki-remotewrite $(DOCKER_COMPOSE) $(PREFIX) -f docker-compose.yml -f docker-compose.remote.yml up -d + clean: ## Stops and removes all containers - $(DOCKER_COMPOSE) $(PREFIX) down + $(DOCKER_COMPOSE) $(PREFIX) -f docker-compose.yml -f docker-compose.remote.yml down logs: ## View the logs from the containers $(DOCKER_COMPOSE) $(PREFIX) logs -f diff --git a/README.md b/README.md index c5c404b..82c96db 100644 --- a/README.md +++ b/README.md @@ -18,9 +18,16 @@ Get your monitoring stack up and running with one command using a Docker Compose You can list the targets using `make`. -## Boot +## Deployment -Boot the stack with docker compose (or `make up`): +You can use one of the following deployment methods: + +- **default**: deploys the containers mentioned above. +- **remote-write**: adds a second prometheus and enables remote write with prometheus and loki. + +### Boot: Default + +Boot the stack with docker compose (or `make up`), which is the default deployment: ```bash docker-compose up -d @@ -46,6 +53,39 @@ loki /usr/bin/loki -conf ... Up 0.0.0.0:3100->31 promtail /usr/bin/promtail ... Up ``` +### Boot: Remote Write + +To deploy the remote write deployment, you can run: + +```bash +make up-remote +``` + +This will deploy the default stack, as well as the remote prometheus container and remote-write configuration, to do the following: +- **prometheus**: push metrics from `cluster: local` to `cluster: remote` using remote-write. +- **loki**: push recording rules metrics to `cluster: remote` prometheus. + +Ensure all containers are running: + +```bash +docker-compose ps +``` + +The output should looke like this: + +```bash + Name Command State Ports +-------------------------------------------------------------------------------------- +cadvisor /usr/bin/cadvisor -logtostderr Up (healthy) 8080/tcp +grafana /run.sh Up 0.0.0.0:3000->3000/tcp +node-exporter /bin/node_exporter --path. ... Up 9100/tcp +prometheus /bin/prometheus --config.f ... Up 0.0.0.0:9090->9090/tcp +prometheus-remote /bin/prometheus --config.f ... Up 0.0.0.0:9091->9090/tcp +alertmanager /bin/alertmanager --config ... Up 0.0.0.0:9093->9093/tcp +loki /usr/bin/loki -conf ... Up 0.0.0.0:3100->3100/tcp +promtail /usr/bin/promtail ... Up +``` + ## Access Grafana Access grafana on [Grafana Home](http://localhost:3000/?orgId=1) (or `make open`) and you should see the three dashboards that was provisioned: @@ -84,25 +124,44 @@ For discovering the **Logs** we can navigate to the Explore / Logs view: ![](./assets/grafana-explore-logs.png) +## Remote Write + +### Remote Write: Prometheus + +The prometheus cluster is configured to push its metrics to our `prometheus-remote` container, and we can access our remote prometheus on port 9091 using the following query: +- [`prometheus_build_info{cluster="local"}`](http://localhost:9091/graph?g0.expr=prometheus_build_info%7Bcluster%3D%22local%22%7D&g0.tab=1&g0.stacked=0&g0.show_exemplars=0&g0.range_input=1h) + +As you can see the external label `cluster: local` is being applied on our `prometheus` container. + +The config is accessible: `./configs/prometheus/prometheus-remotewrite.yml` + +### Remote Write: Loki + +The remote write configuration under the Loki Ruler has been configured to write the recording rules to our `prometheus-remote` container endpoint, and can be access using: +- [`instance:service_log_bytes:sum_rate1m`](http://localhost:9091/graph?g0.expr=instance%3Aservice_log_bytes%3Asum_rate1m&g0.tab=1&g0.stacked=0&g0.show_exemplars=0&g0.range_input=1h) + +The config is accessible: `./configs/loki/loki-remotewrite.yaml` + ## Endpoints The following endpoints are available: -| Container | Internal Endpoint | External Endpoint | -| -------------- | ------------------------- |---------------------- | -| Grafana | http://grafana:3000 | http://localhost:3000 | -| Prometheus | http://prometheus:9090 | http://localhost:9090 | -| Node-Exporter | http://node-exporter:9100 | http://localhost:9100 | -| cAdvisor | http://cadvisor:8080 | N/A | -| Alertmanager | http://alertmanager:9093 | http://localhost:9093 | -| Loki | http://loki:3100 | http://localhost:3100 | +| Container | Internal Endpoint | External Endpoint | Note | +| ----------------- | ----------------------------- |---------------------- | ---------------------------------- | +| Grafana | http://grafana:3000 | http://localhost:3000 | Enabled in default deployment | +| Prometheus | http://prometheus:9090 | http://localhost:9090 | Enabled in default deployment | +| Prometheus-Remote | http://prometheus-remote:9090 | http://localhost:9091 | Enabled in remote-write deployment | +| Node-Exporter | http://node-exporter:9100 | http://localhost:9100 | Enabled in default deployment | +| cAdvisor | http://cadvisor:8080 | N/A | Enabled in default deployment | +| Alertmanager | http://alertmanager:9093 | http://localhost:9093 | Enabled in default deployment | +| Loki | http://loki:3100 | http://localhost:3100 | Enabled in default deployment | ## Cleanup -To remove the containers using docker compose (or `make clean`): +To remove the containers: ```bash -docker-compose down +make clean ``` ## Stargazers over time diff --git a/configs/loki/rules.yaml b/configs/loki/alerting-rules.yaml similarity index 92% rename from configs/loki/rules.yaml rename to configs/loki/alerting-rules.yaml index 0563575..b6cb8c1 100644 --- a/configs/loki/rules.yaml +++ b/configs/loki/alerting-rules.yaml @@ -1,5 +1,5 @@ groups: - - name: log-rules + - name: log-alerting-rules rules: - alert: HighLogErrorRate expr: rate({service_name="loki"} | logfmt | level="error"[1m]) > 1 diff --git a/configs/loki/loki-remotewrite.yaml b/configs/loki/loki-remotewrite.yaml new file mode 100644 index 0000000..1d1ea82 --- /dev/null +++ b/configs/loki/loki-remotewrite.yaml @@ -0,0 +1,94 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + log_level: info + +common: + instance_addr: 127.0.0.1 + path_prefix: /tmp/loki + storage: + filesystem: + chunks_directory: /tmp/loki/chunks + rules_directory: /tmp/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +frontend: + max_outstanding_per_tenant: 2048 + +pattern_ingester: + enabled: true + +limits_config: + ingestion_rate_mb: 50000 + ingestion_rate_strategy: global + ingestion_burst_size_mb: 50000 + max_global_streams_per_user: 0 + max_label_name_length: 1024 + max_label_value_length: 2048 + max_label_names_per_series: 30 + max_streams_matchers_per_query: 1000 + max_concurrent_tail_requests: 20 + max_entries_limit_per_query: 5000 + volume_enabled: true + reject_old_samples: true + reject_old_samples_max_age: 168h + ruler_remote_write_config: + default_client: + url: "http://prometheus-remote:9090/api/v1/write" + send_exemplars: true + basic_auth: + username: "" + password: "" + queue_config: + max_samples_per_send: 1000 + batch_send_deadline: 5s + max_shards: 200 + min_shards: 1 + capacity: 2500 + +query_range: + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 100 + +schema_config: + configs: + - from: 2020-10-24 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +ruler: + alertmanager_url: 'http://alertmanager:9093' + rule_path: /etc/loki/rules + storage: + type: local + local: + directory: /etc/loki/rules + enable_api: true + remote_write: + enabled: true + client: + url: 'http://prometheus-remote:9090/api/v1/write' + queue_config: + batch_send_deadline: 5s + capacity: 2500 + max_samples_per_send: 1000 + max_shards: 200 + min_shards: 1 + config_refresh_period: 10s + add_org_id_header: true + +analytics: + reporting_enabled: false + diff --git a/configs/loki/recording-rules.yaml b/configs/loki/recording-rules.yaml new file mode 100644 index 0000000..318d08c --- /dev/null +++ b/configs/loki/recording-rules.yaml @@ -0,0 +1,14 @@ +groups: + - name: log-recording-rules + interval: 1m + rules: + - record: instance:service_loki_log_rate:sum_rate1m + expr: sum(rate({service_name="loki"}[1m])) + labels: + cluster: local + service_name: loki + - record: instance:service_log_bytes:sum_rate1m + expr: | + sum(bytes_over_time({job="containerlogs", service_name=~".+"}[1m])) by (service_name) + labels: + cluster: local diff --git a/configs/prometheus-remote/prometheus.yml b/configs/prometheus-remote/prometheus.yml new file mode 100644 index 0000000..5b1e73b --- /dev/null +++ b/configs/prometheus-remote/prometheus.yml @@ -0,0 +1,9 @@ +global: + scrape_interval: 15s + external_labels: + cluster: remote + +scrape_configs: + - job_name: 'prometheus-remote' + static_configs: + - targets: ['localhost:9090'] diff --git a/configs/prometheus/alerting-rules.yml b/configs/prometheus/alerting-rules.yml index d330687..16ab0d2 100644 --- a/configs/prometheus/alerting-rules.yml +++ b/configs/prometheus/alerting-rules.yml @@ -1,5 +1,5 @@ groups: -- name: node-exporter +- name: node-exporter-alerting-rules rules: - alert: NodeOutOfMemory annotations: diff --git a/configs/prometheus/prometheus-remotewrite.yml b/configs/prometheus/prometheus-remotewrite.yml new file mode 100644 index 0000000..444fe63 --- /dev/null +++ b/configs/prometheus/prometheus-remotewrite.yml @@ -0,0 +1,54 @@ +global: + scrape_interval: 5s + external_labels: + cluster: local + +rule_files: + - /etc/prometheus/recording-rules.yml + - /etc/prometheus/alerting-rules.yml + +alerting: + alertmanagers: + - scheme: http + static_configs: + - targets: ['alertmanager:9093'] + +scrape_configs: + - job_name: prometheus + static_configs: + - targets: [ 'localhost:9090' ] + labels: + container: 'prometheus' + + - job_name: alertmanager + static_configs: + - targets: [ 'alertmanager:9093' ] + labels: + container: 'alertmanager' + + - job_name: node-exporter + static_configs: + - targets: [ 'node-exporter:9100' ] + labels: + container: 'node-exporter' + + - job_name: cadvisor + static_configs: + - targets: [ 'cadvisor:8080' ] + labels: + container: 'cadvisor' + + - job_name: loki + static_configs: + - targets: [ 'loki:3100' ] + labels: + container: 'loki' + +remote_write: + - url: 'http://prometheus-remote:9090/api/v1/write' + queue_config: + max_samples_per_send: 1000 + batch_send_deadline: 5s + max_shards: 200 + min_shards: 1 + capacity: 2500 diff --git a/configs/prometheus/prometheus.yml b/configs/prometheus/prometheus.yml index 1cdcdee..5a18710 100644 --- a/configs/prometheus/prometheus.yml +++ b/configs/prometheus/prometheus.yml @@ -1,7 +1,7 @@ global: scrape_interval: 5s external_labels: - namespace: local + cluster: local rule_files: - /etc/prometheus/recording-rules.yml @@ -37,3 +37,11 @@ scrape_configs: - targets: [ 'cadvisor:8080' ] labels: container: 'cadvisor' + + - job_name: loki + static_configs: + - targets: [ 'loki:3100' ] + labels: + container: 'loki' + +# remote_write: see ./configs/prometheus-remote/prometheus.yml diff --git a/configs/prometheus/recording-rules.yml b/configs/prometheus/recording-rules.yml index 0f64ed1..4b56b2b 100644 --- a/configs/prometheus/recording-rules.yml +++ b/configs/prometheus/recording-rules.yml @@ -1,5 +1,5 @@ groups: -- name: prometheus_node_exporter_rules +- name: node-exporter-recording-rules rules: - expr: count without (cpu) (count without (mode) (node_cpu_seconds_total{job="node-exporter"})) record: instance:node_num_cpu:sum diff --git a/docker-compose.remote.yml b/docker-compose.remote.yml new file mode 100644 index 0000000..87ca39c --- /dev/null +++ b/docker-compose.remote.yml @@ -0,0 +1,50 @@ +version: '3.8' + +x-logging: + &default-logging + driver: "json-file" + options: + max-size: "1m" + max-file: "1" + tag: "{{.Name}}" + +x-common-labels: &default-labels + logging: "promtail" + logging_jobname: "containerlogs" + +services: + loki: + volumes: + - ./configs/loki/recording-rules.yaml:/etc/loki/rules/fake/recording-rules.yml + + prometheus-remote: + image: prom/prometheus:${PROMETHEUS_VERSION:-v2.42.0} + container_name: prometheus-remote + restart: unless-stopped + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--log.level=info' + - '--storage.tsdb.path=/prometheus' + - '--storage.tsdb.retention.time=7d' + - '--web.console.libraries=/usr/share/prometheus/console_libraries' + - '--web.console.templates=/usr/share/prometheus/consoles' + - '--web.enable-lifecycle' + - '--web.enable-remote-write-receiver' + - '--web.external-url=http://localhost:9090' + volumes: + - ./configs/prometheus-remote/prometheus.yml:/etc/prometheus/prometheus.yml + - prometheus-remote-data:/prometheus + ports: + - 9091:9090 + networks: + - monitoring + labels: + <<: *default-labels + logging: *default-logging + +volumes: + prometheus-remote-data: {} + +networks: + monitoring: + name: monitoring diff --git a/docker-compose.yml b/docker-compose.yml index 4a65f25..ec40978 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -57,7 +57,7 @@ services: - '--web.console.templates=/usr/share/prometheus/consoles' - '--web.external-url=http://localhost:9090' volumes: - - ./configs/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml + - ./configs/prometheus/${PROMETHEUS_CONFIG_FILENAME:-prometheus}.yml:/etc/prometheus/prometheus.yml - ./configs/prometheus/recording-rules.yml:/etc/prometheus/recording-rules.yml - ./configs/prometheus/alerting-rules.yml:/etc/prometheus/alerting-rules.yml - prometheus-data:/prometheus @@ -148,8 +148,8 @@ services: - 3100:3100 command: -config.file=/etc/loki/loki.yaml volumes: - - ./configs/loki/loki.yaml:/etc/loki/loki.yaml - - ./configs/loki/rules.yaml:/etc/loki/rules/fake/loki-rules.yml + - ./configs/loki/${LOKI_CONFIG_FILENAME:-loki}.yaml:/etc/loki/loki.yaml + - ./configs/loki/alerting-rules.yaml:/etc/loki/rules/fake/alerting-rules.yml networks: - monitoring labels: