Compare commits

..

No commits in common. "79699ed2c610a74c4603b975573a0a22b53a633c" and "52eece85e43e662d5a48fa00da03493fc560dd7d" have entirely different histories.

2 changed files with 44 additions and 43 deletions

View File

@ -1,5 +0,0 @@
---
extends: default
rules:
line-length: disable

View File

@ -19,30 +19,30 @@ prometheus_config:
alerting:
alertmanagers:
- static_configs:
- targets:
- localhost:9093
- targets:
- localhost:9093
scrape_configs:
- job_name: prometheus
scrape_interval: 5s
static_configs:
- targets:
- localhost:9090
- localhost:9090
- job_name: alertmanager
scrape_interval: 5s
static_configs:
- targets:
- localhost:9093
- localhost:9093
- job_name: pushgateway
scrape_interval: 5s
static_configs:
- targets:
- jump0.kill0.net:9091
- jump0.kill0.net:9091
- job_name: node
scrape_interval: 5s
static_configs:
- targets:
- jump0.kill0.net:9100
- mine0.kill0.net:9100
- jump0.kill0.net:9100
- mine0.kill0.net:9100
relabel_configs:
- source_labels: [__address__]
target_label: instance
@ -52,8 +52,8 @@ prometheus_config:
scrape_interval: 5s
static_configs:
- targets:
- jump0.kill0.net:3903
- mine0.kill0.net:3903
- jump0.kill0.net:3903
- mine0.kill0.net:3903
relabel_configs:
- source_labels: [__address__]
target_label: instance
@ -63,8 +63,8 @@ prometheus_config:
scrape_interval: 5s
static_configs:
- targets:
- jump0.kill0.net:9115
- mine0.kill0.net:9115
- jump0.kill0.net:9115
- mine0.kill0.net:9115
- job_name: blackbox-icmp4
metrics_path: /probe
params:
@ -72,13 +72,13 @@ prometheus_config:
- icmpv4
static_configs:
- targets:
- dns.google
- vpn-home.kill0.net
- ping-home.kill0.net
- 10.255.0.16
- vpn1-sch.corp.nmi.com
- vpn-chi.ops.nmi.com
- vpn-ash.ops.nmi.com
- dns.google
- vpn-home.kill0.net
- ping-home.kill0.net
- 10.255.0.16
- vpn1-sch.corp.nmi.com
- vpn-chi.ops.nmi.com
- vpn-ash.ops.nmi.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
@ -93,8 +93,8 @@ prometheus_config:
- icmpv6
static_configs:
- targets:
- dns.google
- ping-home.kill0.net
- dns.google
- ping-home.kill0.net
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
@ -109,7 +109,7 @@ prometheus_config:
- tcp_connect4
static_configs:
- targets:
- mine0.kill0.net:25565
- mine0.kill0.net:25565
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
@ -124,7 +124,7 @@ prometheus_config:
- tcp_connect6
static_configs:
- targets:
- mine0.kill0.net:25565
- mine0.kill0.net:25565
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
@ -139,9 +139,9 @@ prometheus_config:
- http_2xx
static_configs:
- targets:
- https://cavi.cc
- https://git.kill0.net
- https://stats.kill0.net
- https://cavi.cc
- https://git.kill0.net
- https://stats.kill0.net
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
@ -152,23 +152,23 @@ prometheus_config:
- job_name: thanos-sidecar
scrape_interval: 5s
static_configs:
- targets:
- "localhost:10902"
- targets:
- "localhost:10902"
- job_name: thanos-query
scrape_interval: 5s
static_configs:
- targets:
- "localhost:10904"
- targets:
- "localhost:10904"
- job_name: thanos-store
scrape_interval: 5s
static_configs:
- targets:
- "localhost:10902"
- targets:
- "localhost:10902"
- job_name: thanos-compact
scrape_interval: 5s
static_configs:
- targets:
- "localhost:10912"
- targets:
- "localhost:10912"
rule_files:
- rules.yaml
@ -238,20 +238,23 @@ prometheus_rules_config:
expr: (time() - node_restic_last_run_time{restic_job=~"minecraft"}) > 86400
for: 2h
- alert: MinecraftUnitInactive
expr: node_systemd_unit_state{name="minecraft.service",state="inactive"} == 1
expr: node_systemd_unit_state{name="minecraft.service",state="inactive"} == 1
for: 15m
- alert: GiteaUnitInactive
expr: node_systemd_unit_state{name="gitea.service",state="inactive"} == 1
expr: node_systemd_unit_state{name="gitea.service",state="inactive"} == 1
for: 15m
- alert: MaintenanceMode
expr: maintenance_mode == 1
for: 1m
#- alert: QuietHours
# expr: america_chicago_hour >= 22 or america_chicago_hour < 10
# for: 1m
- name: blackbox.rules
rules:
- alert: ServiceDown
expr: probe_success{job!~"blackbox-icmp[0-9]"} == 0
for: 1m
- alert: PingDown
- alert: PingDown
expr: probe_success{job=~"blackbox-icmp[0-9]"} == 0
for: 15s
- alert: CertExpiry
@ -263,7 +266,7 @@ prometheus_rules_config:
# summary: Certificates expiring in < 30 days
summary: "{% raw %}Blackbox SSL certificate will expire soon (instance {{ $labels.instance }}){% endraw %}"
description: "{% raw %}SSL certificate expires in 30 days\n VALUE = {{ $value }}\n LABELS = {{ $labels }}{% endraw %}"
- alert: CertExpiry
- alert: CertExpiry
expr: ((probe_ssl_earliest_cert_expiry{job="blackbox-http"} - time()) / 86400) < 14
for: 15s
labels:
@ -357,7 +360,7 @@ alertmanager_config:
- times:
- start_time: "03:00"
end_time: "15:00"
node_exporter_du_directories:
- /var/log/syslog
- /var/spool/rsyslog
@ -407,6 +410,9 @@ karma_config:
color: "#ff220c"
alertAcknowledgement:
enabled: true
#duration: 15m0s
#author: karma
#comment: ACK! This alert was acknowledged using karma on %NOW%
thanos_bucket_config: "{{ vault_thanos_bucket_config }}"