安装 prometheus

Prometheus 配置文件

# 全局设置,可以被覆盖
global:
  # 每次数据收集的间隔,默认值为 15s
  scrape_interval: 15s
  # 控制评估规则的频率
  # prometheus 使用规则产生新的时间序列数据或者产生警报
  evaluation_interval:15s
  # 所有时间序列和警告与外部通信时用的外部标签
  external_labels:  
    monitor: 'codelab-monitor'
# 警告规则设置文件
rule_files:
  - '/etc/prometheus/alert.rules'

# 用于配置 scrape 的 endpoint
scrape_configs:
  # 全局唯一, 采集 Prometheus 自身的 metrics
  - job_name: 'prometheus'
    # 覆盖全局的 scrape_interval
    scrape_interval: 5s
    # 静态目标的配置
    static_configs:
      - targets: ['127.0.0.1:9090']

  # 全局唯一, 采集本机的 metrics,需要在本机安装 node_exporter
  - job_name: 'node'
    scrape_interval: 10s
    static_configs:
      # 本机 node_exporter 的 endpoint
      - targets: ['10.0.2.15:9100']

Alert 配置文件

# alert 名字(InstanceDown)
ALERT InstanceDown  
  # 判断条件
  IF up == 0
  # 条件保持 5m 才会发出 alert
  FOR 5m
  # alert 的标签
  LABELS { severity = "critical" }
  # 其他标签,但不用于标识 alert
  ANNOTATIONS {
    summary = "Instance {{ $labels.instance }} down",
    description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
 }

创建 Namespace(便于管理)

apiVersion: v1
kind: Namespace
metadata:
  name: kube-prometheus
  labels:
    name: kube-prometheus

创建 ConfigMap(管理 Prometheus 配置文件)

apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  # 使用新创建的 Namespace
  namespace: kube-prometheus
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      scrape_timeout: 15s
    scrape_configs:
    - job_name: 'prometheus'
      static_configs:
      - targets: ['localhost:9090']

创建 RBAC

apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  # 使用新创建的 Namespace
  namespace: kube-prometheus

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups:
  - ""
  resources:
  - nodes
  - services
  - endpoints
  - pods
  - nodes/proxy
  verbs:
  - get
  - list
  - watch
- apiGroups:
  - ""
  resources:
  - configmaps
  - nodes/metrics
  verbs:
  - get
- nonResourceURLs:
  - /metrics
  verbs:
  - get

---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  # 使用新创建的 Namespace
  namespace: kube-prometheus

创建 PVC

# 此处使用 NFS-Client
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
  name: prometheus
  namespace: kube-prometheus
  annotations:
    volume.beta.kubernetes.io/storage-class: "managed-nfs-storage"
spec:
  accessModes:
    - ReadWriteMany
  resources:
    requests:
      storage: 200M

创建 Prometheus Server

apiVersion: apps/v1
kind: Deployment
metadata:
  name: prometheus-server
  namespace: kube-prometheus
spec:
  selector:
    matchLabels:
      app: prometheus-server
  template:
    metadata:
      labels:
        app: prometheus-server
    spec:
      serviceAccountName: prometheus
      containers:
      - name: prometheus-server
        image: prom/prometheus
        command:
          - "/bin/prometheus"
        args:
          - "--config.file=/etc/prometheus/prometheus.yml"
          # 指定 TSDB 的存储路径
          - "--storage.tsdb.path=/prometheus"
          # 指定 TSDB 的存储时间
          - "--storage.tsdb.retention=24h"
          - "--web.enable-admin-api"
          # 开启热更新
          # 访问 localhost:9090/-/reload,就能使配置文件生效
          - "--web.enable-lifecycle"
        resources:
          limits:
            memory: "128Mi"
            cpu: "50m"
        ports:
        - containerPort: 9090
          name: prome-port
        volumeMounts:
          - mountPath: "/prometheus"
            subPath: prometheus01
            name: data
          - mountPath: "/etc/prometheus"
            name: config
      volumes:
        - name: data
          persistentVolumeClaim:
            claimName: prometheus
        - name: config
          configMap:
            name: prometheus-config

创建 Service

apiVersion: v1
kind: Service
metadata:
  name: prome-svc
  namespace: kube-prometheus
spec:
  selector:
    app: prometheus-server
  # 使用 NodePort 方式
  type: NodePort
  ports:
  - port: 9090
    targetPort: prome-port

应用上方创建的所有 Yaml

kubectl create -f ./

通过访问 http://nodeport:port 访问 Prometheus WebUI。

Last updated

Was this helpful?