Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Update the environments as required with their relevant file-paths of environment & secrets file and the namespace to be used.

In below config "demo" is the environment with default namespace being set & environment files being provided.

Code Block
languagebash
# deploy-as-code/helm/charts/monitoring/monitoring-helmfile.yaml

...

Code Block
languagebash

environments:
  demo:
    values:
      - namespace: monitoring
      - ../../environments/egov-demo.yaml
      - ../../environments/egov-demo-secrets.yaml

...

  1. Create AWS Web Identity (OIDC) IAM role with following policy.

    Code Block
    languagejson
    {
        "Version": "2012-10-17",
        "Statement": [
            {
                "Sid": "AccessToLokiBucket",
                "Effect": "Allow",
                "Action": [
                    "s3:PutObject",
                    "s3:GetObject",
                    "s3:DeleteObject",
                    "s3:ListBucket"
                ],
                "Resource": [
                    "arn:aws:s3:::<s3-bucket>",
                    "arn:aws:s3:::<s3-bucket>/*"
                ]
            }
        ]
    }
    

  2. Update s3 details & role ARN in below config.

    Code Block
    languageyaml
    # deploy-as-code/helm/environments/egov-demo.yaml
    loki:
      persistence:
        enabled: true
        accessModes:
          - ReadWriteOnce
        size: 10Gi
      serviceAccount:
        annotations:
          eks.amazonaws.com/role-arn: <s3-role-arn>    ## AWS arn for s3 role 
      additionalConfigs:
        schema_config:
          configs:
            - from: 2020-10-24
              store: boltdb-shipper
              object_store: s3                         ## localAWS filesystems3 as storage
              schema: v11
              index:
                prefix: index_
                period: 24h
        storage_config:
          boltdb_shipper:
            active_index_directory: /data/loki/index
            cache_location: /data/loki/index_cache
            shared_store: s3                           ## localAWS filesystems3 as storage
            cache_ttl: 24h
          aws:
            s3: s3://<region>/<s3-bucket>                 ## s3 region & bucket
        compactor:
          working_directory: /data/loki/boltdb-shipper-compactor
          shared_store: s3                             ## localAWS filesystems3 as storage
          retention_enabled: true
          compaction_interval: 168h                    ## compaction in hours
        table_manager:
          retention_deletes_enabled: true
          retention_period: 168h                       ## retention in hours
    

...

Code Block
languageyaml
# deploy-as-code/helm/environments/egov-demo.yaml
prometheus:
  externalLabels:
    cluster: <cluster-name>    additionalScrapeConfigs:      - job_name: 'nginx-ingress-metrics'       static       ## provide cluster name    
  additionalScrapeConfigs:
    - job_name: 'nginx-ingress-metrics'
      static_configs:
        - targets: [ 'nginx-ingress-controller-metrics.egov:10254' ]
    - job_name: 'blackbox'
      metrics_path: /probe
      params:
        module: [ http_2xx ]
      static_configs:
        - targets:
            - <list of urls to be monitored>         ### add all URLs to monitor
      relabel_configs:
        - source_labels: [ __address__ ]
          target_label: __param_target
        - source_labels: [ __param_target ]
          target_label: instance
        - target_label: __address__
          replacement: prometheus-blackbox-exporter:9115
    - job_name: 'blackbox_exporter'
      static_configs:
        - targets: [ 'prometheus-blackbox-exporter:9115' ]

...

Code Block
languageyaml
# deploy-as-code/helm/environments/egov-demo-secrets.yaml
cluster-configs:
  secrets:
    alertmanager:
      config:
        global:
          slack_api_url: https://hooks.slack.com     ## slack webhook URL
          resolve_timeout: 5m
        route:
          group_by: ['alertname']
          group_wait: 30s
          group_interval: 5m
          repeat_interval: 10m
          routes:
          - receiver: slack-notification
            match:
                severity: "warning|critical"
            continue: true
        receivers:
        - name: slack-notification
          slack_configs:
            - channel: '<slack-channel>'             ##  sendslack channel
              send_resolved: true
              username: 'Alertmanager'
              title: |
                  [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }}
              text: |
                  {{ range .Alerts -}}
                  {{- "\n" -}}
                  *Alert:* {{ .Annotations.summary }}
                  {{ if .Labels.severity }}*Severity:* `{{ .Labels.severity }}`{{ end }}
                  *Cluster:* {{ .Labels.cluster }}
                  *Details:*
                  {{ .Annotations.description }}
                  {{ end }}

...

Code Block
languageyaml
# deploy-as-code/helm/environments/egov-demo-secrets.yaml
cluster-configs:
  secrets:
    alertmanager:
      config:
        global:
          resolve_timeout: 5m
        route:
          group_by: ['alertname']
          group_wait: 30s
          group_interval: 5m
          repeat_interval: 10m
          routes:
          - receiver: email-notification
            match:
              severity: "warning|critical"
            continue: true
        receivers:
        - name: email-notification
          email_configs:
            - to: '<recepient-email-address>'             ##  reciever's email id
              from: '<sender-email-address>'              ##  sender's email id
              smarthost: 'smtp.gmail.com:587'             ##  "" Update SMPT
              auth_username: '<sender-email-address>'     ##  configuration
              auth_password: '<auth-token>'               ##  as per the provider ""
              send_resolved: true
              headers:
                subject: |
                  [{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.cluster }} - {{ .CommonLabels.alertname }}
              html: |
                <html>
                <head>
                <title>Alert!</title>
                </head>
                <body>
                {{ range .Alerts.Firing }}
                <ul>
                <li> <b>Alert Name:</b> {{ .CommonLabels.alertname }} </li>
                <li> <b>Severity:</b> {{ if eq .Labels.severity "critical" }}<b style="color:red;">CRITICAL</b>{{ else if eq .Labels.severity "warning" }}<b style="color:orange;">WARNING</b>{{ else }}<b>{{ .Labels.severity | toUpper }}</b>{{ end }} </li>
                <li> <b>Summary:-</b> {{ .Annotations.summary }} </li>
                <li> <b>Cluster:-</b> Cluster </li>
                <li> <b>Details:</b>
                  <p style="margin-left: 20px;"> {{ .Annotations.description | replace "\n" "<br>" }} </p>
                </li>
                </ul><br>
                {{ end }}
                </body></html>

...