说明

不对节点功能以及架构进行说明
未把Remote Storage隔离出来,后续优化
采用Ansible部署,可参考之前文章
部分配置采用默认,做案例时做修改
时间同步

一. 初始环境

主机名 IP 部署服务
vm1 192.168.0.107 consul_1.2.4、alertmanager-0.22.2、node_exporter-1.1.2、prometheus-2.27.1、自定义metrics
vm2 192.168.0.106 consul_1.2.4、node_exporter-1.1.2、自定义metrics
vm3 192.168.0.108 consul_1.2.4、node_exporter-1.1.2、自定义metrics

二. 实现拓扑图

三. node_exporter

解压即可启动
日志路径还需分析,默认/var/log/message下

[Service]
User=root
Group=root
ExecStart=/opt/app/node_exporter/node_exporter --web.listen-address=:9321 --log.level=debug

[Install]
WantedBy=multi-user.target

[Unit]
Description=node_exporter
After=network.target

四. Consul

4.1 consul配置

ip等参数自行修改

{
   "bind_addr": "192.168.0.107",
   "client_addr": "0.0.0.0",
   "datacenter": "wtc-consul",
   "data_dir": "/opt/app/consul/data",
   "log_level": "INFO",
   "log_file": "/opt/app/consul/logs/consul.log",
   "log_rotate_duration": "24h",
   "enable_syslog": false,
   "enable_debug": true,
   "node_name": "consul-vm1",
   "server": true,
   "ui": true,
   "bootstrap_expect": 3, // 3节点,leader 随机选取
   "leave_on_terminate": false,
   "skip_leave_on_interrupt": true,
   "encrypt_verify_incoming":false,
   "encrypt_verify_outgoing":false,
   "rejoin_after_leave": true,
   "retry_join": [ // 不需要写本机IP
      "192.168.0.106",
      "192.168.0.108" 
   ],
   "ports": {
   "http": 8500, // web listen and client register address
   "dns": 8600,
   "serf_lan":8301,
   "serf_wan":8302,
   "server":8300
  }
}

4.2 consul_agent.json配置

services 多个节点同时注册,新的node_exporter新增内容即可

注意:service[-1]末尾不要携带逗号
{
  "services":[
    {
      "Id": "node_exporter_vm1",
      "Name": "vm1",
      "Tags": [
        "node_exporter",
        "vmware_vm1"
      ],
      "Address": "192.168.0.107",
      "Port": 9321,
      "Meta": {
        "service": "node_exporter",
        "use": "monitor_env",
        "idc": "beijing"
      },
      "Check": [{
        "HTTP": "http://192.168.0.107:9321/metrics",
        "Interval": "10s",
        "timeout": "5s"
      }]
    }
  ]
}

4.3 consul 启动配置

[Unit]
Description="consul-service"
Requires=network-online.target
After=network-online.target

[Service]
User=root
Group=root
ExecStart=/usr/local/bin/consul agent -config-dir=/opt/app/consul/config
ExecReload=/usr/local/bin/consul reload  // 热加载配置
KillMode=process
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF
http://consul_ip:8500

五. AlartManager

默认配置
数据存储目录需额外创建

[Unit]
Description=Alertmanager
After=network.target

[Service]
Type=simple
User=prometheus
ExecStart=/opt/app/alertmanager/alertmanager --config.file=/opt/app/alertmanager/alertmanager.yml --storage.path=/opt/app/alertmanager/data
Restart=on-failure

[Install]
WantedBy=multi-user.target
http://alertmanager_ip:9093

六. Prometheus

默认配置

[Unit]
Description=Prometheus Server
Documentation=https:/prometheus.io/docs/introduction/overview/
After=network.target

[Service]
User=root
Group=root
Type=simple
Restart=on-failure
WorkingDirectory=/opt/app/prometheus
ExecStart=/opt/app/prometheus/prometheus --config.file=/opt/app/prometheus/prometheus.yml --log.level=info --storage.tsdb.retention=1500d

[Install]
WantedBy=multi-user.target
http://prometheus_ip:9090

Copyright & TianCiwang 2021 all right reserved,powered by Gitbook修改时间: 2021-10-26 11:45:44

results matching ""

    No results matching ""

    results matching ""

      No results matching ""