Page MenuHomePhorge

No OneTemporary

Authored By
Unknown
Size
4 KB
Referenced Files
None
Subscribers
None
diff --git a/helm/files/victorialogs.rules.yml b/helm/files/victorialogs.rules.yml
index 875efbb..f1c1280 100644
--- a/helm/files/victorialogs.rules.yml
+++ b/helm/files/victorialogs.rules.yml
@@ -1,96 +1,101 @@
# _time: 5m is implicit in all expressions when the interval is 5m
groups:
- name: ServiceLog
type: vlogs
interval: 5m
rules:
- alert: HasErrorLog
expr: 'env: "prod" AND status:~"error|warn" | stats by (service, kubernetes.pod) count() as errorLog | filter errorLog:>0'
annotations:
description: 'Service {{$labels.service}} (pod {{ index $labels "kubernetes.pod" }}) generated {{$labels.errorLog}} error logs in the last 5 minutes'
- name: ServiceRequest
type: vlogs
interval: 5m
rules:
- alert: TooManyFailedRequest
expr: '* | extract "ip=<ip> " | extract "status_code=<code>;" | stats by (ip) count() if (code:~4.*) as failed, count() as total| math failed / total as failed_percentage| filter failed_percentage :> 0.01 | fields ip,failed_percentage'
annotations:
description: "Connection from address {{$labels.ip}} has {{$value}}% failed requests in last 5 minutes"
- name: roundcube_rules
type: vlogs
interval: 5m
rules:
- alert: Roundcube php fatal error
annotations:
description: "{{ $labels.pod_name }} encountered {{$value}} php fatal errors in the last 5 minutes."
expr: '{pod_name=~"roundcube.*"} "PHP Fatal error" | stats by (pod_name) count(*)'
labels:
severity: warning
- record: kolab:roundcube:fatalerror:rate5m
expr: '{pod_name=~"roundcube.*"} "PHP Fatal error" | stats by (pod_name) count(*)'
- alert: Database error
annotations:
description: "{{ $labels.pod_name }} encountered {{$value}} db errors in the last 5 minutes."
expr: '{pod_name=~"roundcube.*"} "DB Error" | stats by (pod_name) count(*)'
labels:
severity: warning
- record: kolab:roundcube:dberror:rate5m
expr: '{pod_name=~"roundcube.*"} "DB Error" | stats by (pod_name) count(*)'
- alert: Roundcube exception
annotations:
description: "{{ $labels.pod_name }} encountered {{$value}} execptions in the last 5 minutes."
expr: '{pod_name=~"roundcube.*"} "exception message" | stats by (pod_name) count(*)'
labels:
severity: warning
- record: kolab:roundcube:exception:rate5m
expr: '{pod_name=~"roundcube.*"} "exception message" | stats by (pod_name) count(*)'
- name: horizon_rules
type: vlogs
interval: 5m
rules:
- alert: Horizon error
annotations:
description: "{{ $labels.pod_name }} encountered {{$value}} an error in the last 5 minutes."
expr: '{pod_name=~"horizon.*"} "production.ERROR" | stats by (pod_name) count(*)'
labels:
severity: warning
- name: kolab_rules
type: vlogs
interval: 5m
rules:
- record: kolab:error:rate5m
expr: '{pod_name=~"kolab.*"} "production.ERROR" | stats by (pod_name) count(*) '
- record: kolab:authenticate:rate5m
expr: '{pod_name=~"kolab.*"} "POST https://kolab-services:80/api/webhooks/cyrus-sasl" | stats by (pod_name) count(*) as rate'
- alert: Kolab error
annotations:
- description: "{{ $labels.pod_name }} encountered {{$value}} an error in the last 5 minutes."
+ description: "{{ $labels.pod_name }} encountered {{$value}} errors in the last 5 minutes."
expr: '{pod_name=~"kolab.*"} "production.ERROR" | stats by (pod_name) count(*)'
labels:
severity: warning
- name: postfix_rules
type: vlogs
interval: 5m
rules:
- record: kolab:postfix:rejection:rate5m
expr: |
pod_name:postfix* | "refused to talk to me" | stats by (pod_name) count(*)
# - alert: Postfix rejections
# annotations:
# description: "{{ $value }} messages were rejected."
# expr: 'postfix:rejection:rate5m > 5'
# labels:
# severity: warning
- name: imap_rules
type: vlogs
interval: 5m
rules:
- record: kolab:imap:login:rate5m
expr: |
pod_name:imap* | "User logged in" | stats by (pod_name) count(*) as rate
+ - alert: IMAP error message
+ annotations:
+ description: "{{ $labels.pod_name }} encountered {{$value}} errors in the last 60 minutes."
+ expr: |
+ _time: 60m pod_name:imap* facility:"local6.err" | stats by (pod_name) count(*)

File Metadata

Mime Type
text/x-diff
Expires
Sat, Apr 4, 8:48 AM (2 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
18823324
Default Alt Text
(4 KB)

Event Timeline