Page MenuHomePhorge

No OneTemporary

Authored By
Unknown
Size
17 KB
Referenced Files
None
Subscribers
None
diff --git a/helm/templates/prometheus-configmap.yaml b/helm/templates/prometheus-configmap.yaml
index 870c99c..5375b16 100644
--- a/helm/templates/prometheus-configmap.yaml
+++ b/helm/templates/prometheus-configmap.yaml
@@ -1,423 +1,425 @@
{{- if .Values.prometheus.enabled -}}
kind: ConfigMap
apiVersion: v1
metadata:
name: prom-config
data:
prometheus.yml: |
global:
scrape_interval: 1m
evaluation_interval: 1m
+ external_labels:
+ deployment: {{ .Values.domainName }}
{{- if .Values.alertmanager.externalUrl }}
alerting:
alertmanagers:
- static_configs:
- targets:
- {{ .Values.alertmanager.externalUrl }}
scheme: {{ .Values.alertmanager.externalUrlScheme }}
{{- end }}
rule_files:
- /config/prometheus/rules/*.yml
scrape_configs:
- job_name: 'kolab'
static_configs:
- targets: ['{{ .Values.kolab.servicesService }}:80']
metrics_path: "/api/webhooks/metrics"
- job_name: 'swoole'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- kolab
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: kolab|kolab-services
- source_labels: [__meta_kubernetes_pod_ip]
target_label: __address__
- target_label: __param_kubernetes_pod_name
source_labels: [__meta_kubernetes_pod_name]
- target_label: kubernetes_pod_name
source_labels: [__param_kubernetes_pod_name]
- target_label: __metrics_path__
replacement: /api/webhooks/metrics/swoole
- source_labels: [__address__]
action: replace
replacement: $1:8000
target_label: __address__
{{- if .Values.imap.enabled }}
- job_name: 'imap'
# We update slower than scrape interval, so the timestamps are usually outdated.
# Also, the cyrus exporter tends to get stuck.
honor_timestamps: false
static_configs:
- targets:
{{- if .Values.imap.murder.enabled }}
{{- range (untilStep 0 (int .Values.imap.replicas) 1) }}
- 'imap-{{ . }}.imap-headless:11080'
{{- end }}
{{- else }}
- 'imap:80'
{{- if .Values.imap.replica.enabled }}
- 'imap-replica:80'
{{- end }}
{{- end }}
metrics_path: "/metrics"
{{- end }}
- job_name: "pushgateway"
honor_labels: true
static_configs:
- targets: ['pushgateway:9091']
{{- if .Values.prometheus.clusterMonitoring }}
- job_name: 'vector-node-metrics'
kubernetes_sd_configs:
- role: pod
namespaces:
names:
- kube-system
relabel_configs:
- source_labels: [__meta_kubernetes_pod_label_app]
action: keep
regex: vector
- source_labels: [__meta_kubernetes_pod_ip]
target_label: __address__
- target_label: __param_kubernetes_pod_name
source_labels: [__meta_kubernetes_pod_name]
- target_label: kubernetes_pod_name
source_labels: [__param_kubernetes_pod_name]
- target_label: __metrics_path__
replacement: /metrics
- source_labels: [__address__]
action: replace
replacement: $1:9598
target_label: __address__
- job_name: 'kube-state-metrics'
static_configs:
- targets: ['kube-state-metrics.default.svc.cluster.local:8080']
- job_name: 'kubelet metrics'
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: labeldrop
regex: beta_kubernetes_io(.+)
- action: labeldrop
regex: kubernetes_io(.+)
- action: labeldrop
regex: node_kubernetes_io(.+)
- action: labeldrop
regex: node_role_kubernetes_io(.+)
- source_labels: [__address__]
target_label: __address__
regex: ([^:]+)(?::\d+)?
replacement: $1:10250
- regex: (.+)
replacement: /metrics
source_labels: [__meta_kubernetes_node_name]
target_label: __metrics_path__
metric_relabel_configs:
# Drop the most expensive metrics that we don't look at
- source_labels: [__name__]
regex: (.+)_bucket|apiserver_(.+)|etcd_(.+)
action: drop
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
- job_name: 'cadvisor metrics'
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- action: labeldrop
regex: beta_kubernetes_io(.+)
- action: labeldrop
regex: kubernetes_io(.+)
- action: labeldrop
regex: node_kubernetes_io(.+)
- action: labeldrop
regex: node_role_kubernetes_io(.+)
- source_labels: [__address__]
target_label: __address__
regex: ([^:]+)(?::\d+)?
replacement: $1:10250
- regex: (.+)
replacement: /metrics/cadvisor
source_labels: [__meta_kubernetes_node_name]
target_label: __metrics_path__
metric_relabel_configs:
# Drop the most expensive metrics that we don't look at
- source_labels: [__name__]
regex: container_tasks_state|container_fs_(.+)|container_blkio_(.+)
action: drop
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
{{- end }}
{{- if .Values.prometheus.haproxyMonitoring }}
- job_name: 'haproxy-metrics'
static_configs:
- targets:
{{- range .Values.prometheus.haproxyMonitoring.targets }}
- {{ . }}
{{- end }}
{{- end }}
{{- if .Values.prometheus.mariadbMonitoring }}
- job_name: mariadb
static_configs:
- targets:
{{- range .Values.prometheus.mariadbMonitoring.targets }}
- {{ . }}
{{- end }}
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: localhost:9104
{{- end }}
- job_name: blackbox-tcp-tls
honor_timestamps: true
params:
module: [tcp_tls]
metrics_path: /probe
scheme: http
follow_redirects: true
enable_http2: true
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- source_labels: [__param_module]
target_label: module
- target_label: __address__
replacement: localhost:9115
static_configs:
- targets:
- proxy:993
# - job_name: blackbox-smtp-starttls
# metrics_path: /probe
# params:
# module: [smtp_starttls]
# relabel_configs:
# - source_labels: [__address__]
# target_label: __param_target
# - source_labels: [__param_target]
# target_label: instance
# - source_labels: [__param_module]
# target_label: module
# - target_label: __address__
# replacement: localhost:9115
# static_configs:
# - targets:
# - proxy:587
{{- if .Values.loki.enabled }}
- job_name: 'loki'
static_configs:
- targets: ['loki.kolab.svc.cluster.local:3100']
metrics_path: "/metrics"
metric_relabel_configs:
# Drop most metrics
- source_labels: [__name__]
regex: loki_internal_log_(.+)
action: keep
{{- end }}
{{- if .Values.victorialogs.enabled }}
- job_name: 'victorialogs'
static_configs:
- targets: ['victorialogs.kolab.svc.cluster.local:9428']
metrics_path: "/metrics"
{{- end }}
blackbox.yml: |
modules:
http_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
method: GET
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
tls_config:
insecure_skip_verify: true
https_2xx:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
fail_if_not_ssl: true
method: GET
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
https_403:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: [403]
fail_if_not_ssl: true
method: GET
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
http_2xx_auth:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
method: GET
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
basic_auth:
username: "{{ .Values.serviceAccounts.monitoring1.user }}"
password: "{{ .Values.serviceAccounts.monitoring1.password }}"
tls_config:
insecure_skip_verify: true
https_2xx_auth:
prober: http
timeout: 5s
http:
valid_http_versions: ["HTTP/1.1", "HTTP/2.0"]
valid_status_codes: []
fail_if_not_ssl: true
method: GET
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
basic_auth:
username: "{{ .Values.serviceAccounts.monitoring1.user }}"
password: "{{ .Values.serviceAccounts.monitoring1.password }}"
icmp:
prober: icmp
icmp:
preferred_ip_protocol: "ip4"
ip_protocol_fallback: false
tcp_tls:
prober: tcp
timeout: 5s
tcp:
tls: true
tls_config:
server_name: "{{ template "kolab.websiteDomain" . }}"
{{- if .Values.tlsSecret.ca }}
ca_file: /etc/certs/ca.cert
{{- end }}
imap_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "OK.*STARTTLS"
- send: ". STARTTLS"
- expect: "OK"
- starttls: true
- send: ". capability"
- expect: "CAPABILITY IMAP4rev1"
smtp_starttls:
prober: tcp
timeout: 5s
tcp:
query_response:
- expect: "^220 ([^ ]+) ESMTP (.+)$"
- send: "EHLO prober\r"
- expect: "^250-STARTTLS"
- send: "STARTTLS\r"
- expect: "^220"
- starttls: true
- send: "EHLO prober\r"
- expect: "^250-AUTH"
- send: "QUIT\r"
tls_config:
server_name: "{{ template "kolab.websiteDomain" . }}"
{{- if .Values.tlsSecret.ca }}
ca_file: /etc/certs/ca.cert
{{- end }}
dashboard.html: |
{{ .Files.Get "files/dashboard.html" | indent 4}}
prometheus.rules.yml: |
groups:
- name: prometheus-checks
rules:
- alert: Watchdog
expr: vector(1)
labels:
severity: none
prometheus: "{{ .Values.domainName }}"
annotations:
summary: An alert that should always be firing to certify that Alertmanager is working properly.
- alert: scrape_job_down
expr: up == 0
for: 5m
labels:
severity: warning
annotations:
summary: Scrape job {{ "{{" }} $labels.job {{ "}}" }} down on {{ "{{" }} $labels.hostname {{ "}}" }}.
{{- if .Values.prometheus.mariadbMonitoring }}
- name: mysqld-checks
rules:
- alert: MysqlNodeIsDown
annotations:
summary: A mysql node is down on {{ "{{" }} $labels.hostname {{ "}}" }}.
expr: mysql_up == 0
for: 1m
labels:
severity: critical
- alert: WSREPNotConnected
annotations:
summary: A mysql node is down on {{ "{{" }} $labels.hostname {{ "}}" }}.
expr: mysql_global_status_wsrep_connected == 0
for: 1m
labels:
severity: critical
- alert: WSREPClusterSize
annotations:
summary: The mariadb cluster does not have all 4 paricipants.
expr: mysql_global_status_wsrep_cluster_size != 4
for: 1m
labels:
severity: critical
- alert: WSREPClusterStatus
annotations:
summary: Cluster status not ok.
expr: mysql_global_status_wsrep_cluster_status == 0
for: 1m
labels:
severity: critical
{{- end }}
{{- if .Values.imap.replica.enabled }}
- alert: IMAPReplicaOutOfSync
annotations:
summary: The IMAP replica is not in sync with the primary.
expr: abs(cyrus_usage_mailboxes{instance=~"imap:.*"} - ignoring (instance) cyrus_usage_mailboxes{instance=~"imap-replica:.*"}) == 0
for: 1m
labels:
severity: critical
{{- end }}
# TODO for imap murder ensure the folder count matches
web.rules.yml: |
{{ .Files.Get "files/web.rules.yml" | indent 4}}
{{- if .Values.prometheus.clusterMonitoring }}
cluster.rules.yml: |
{{ .Files.Get "files/cluster.rules.yml" | indent 4}}
{{- end }}
{{- end }}
diff --git a/helm/templates/victorialogs-deployment.yaml b/helm/templates/victorialogs-deployment.yaml
index d412e23..4540dcc 100644
--- a/helm/templates/victorialogs-deployment.yaml
+++ b/helm/templates/victorialogs-deployment.yaml
@@ -1,79 +1,80 @@
{{- if and .Values.victorialogs.enabled -}}
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
alpha.image.policy.openshift.io/resolve-names: '*'
labels:
app: victorialogs
name: victorialogs
spec:
progressDeadlineSeconds: 600
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
deployment: victorialogs
strategy:
type: Recreate
template:
metadata:
annotations:
checksum/config: {{ include (print $.Template.BasePath "/victorialogs-configmap.yaml") . | sha256sum }}
labels:
deployment: victorialogs
vector.dev/exclude: "true"
spec:
containers:
- name: victorialogs
image: {{ .Values.image.victorialogsImage }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- containerPort: 9428
protocol: TCP
args:
- -storageDataPath=data
# - -logIngestedRows
# - -logNewStreams
- -retention.maxDiskUsagePercent=80
- -memory.allowedPercent=60
resources:
{{- toYaml .Values.victorialogs.resources | nindent 10 }}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /data
name: victorialogs-data
- name: vmalert
image: {{ .Values.image.vmalertImage }}
imagePullPolicy: {{ .Values.image.pullPolicy }}
args:
- -rule=/config/rules.yaml
- -datasource.url=http://victorialogs:9428
- -notifier.url={{ .Values.alertmanager.externalUrlScheme }}://{{ .Values.alertmanager.externalUrl | default "alertmanager:9093" }}
- -remoteWrite.url=http://prometheus:9090
- -remoteRead.url=http://prometheus:9090
+ - -external.label=deployment={{ .Values.domainName }}
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /config/
name: config-volume
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
terminationGracePeriodSeconds: 30
volumes:
- name: victorialogs-data
persistentVolumeClaim:
claimName: victorialogs-data
- name: config-volume
configMap:
defaultMode: 420
name: victorialogs-config
items:
- key: rules.yaml
path: rules.yaml
---
{{- include "kolab.pvc" .Values.victorialogs.volumes.data }}
{{- end }}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Apr 6, 1:01 AM (3 d, 22 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
18824242
Default Alt Text
(17 KB)

Event Timeline