夜莺监控插件(v3)
1、判断https证书是否过期的插件,如下:
#!/bin/bash
: '
此脚本用来获取https证书过期时间
'
#证书cer文件路径
CER_FILE=/usr/local/n9e/plugin/cert/blog.ywdevops.cn.cer
ip=$(ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|head -n 1)
#获取证书到期时间
last_time () {
#使用openssl命令来获取证书的到期时间
END_TIME=$(echo | openssl x509 -in ${CER_FILE} -noout -text | grep After | awk '{print $4,$5,$7}')
END_TIME1=$(date +%s -d "$END_TIME")
NOW_TIME=$(date +%s -d "$(date "+%Y-%m-%d %H:%M:%S")")
#时间转换
value=$(($(($END_TIME1 - $NOW_TIME))/(60*60*24)))
}
last_time
echo '[
{
"endpoint": "'${ip}'",
"timestamp": '$(date +%s)',
"tags": "cert=ghs.sfair.com.cer",
"metric": "ghs.cer",
"value": '${value}'
}
]'
2、nginx指标采集插件(需要提前安装vts模块)
#!/usr/bin/python
#-*- coding:utf-8 -*- #指定编码
'''
@此脚本用来获取nginx的监控状态信息,json界面的各个参数说明如下:
servermain:
hostname: 主机名
nginxVersion: nginx版本
loadMsec: 以毫秒为单位的处理时间
nowMsec: 以毫秒为单位的当前时间
connections:
active: 当前活动的客户端连接数
reading: 读取客户端连接的总数
writing: 写入客户端连接的总数
waiting: 正在等待的客户端连接总数
accepted: 接受的客户端连接总数
handled: 已处理的客户端连接总数
requests: 请求的客户端连接总数
sharedZones:
name: 配置中指定的共享内存名称
maxSize: 配置中指定的共享内存的最大大小的限制
usedSize: 共享内存的当前大小
usedNode: 共享内存中当前使用的节点数
serverZones:
requestCounter: 从客户端接收的客户端请求总数
inBytes: 从客户端接收的总字节数
outBytes: 发送到客户端的总字节数
responses:
1xx,2xx,3xx,4xx,5xx: 表示响应状态吗
miss: 未命中的缓存数
bypass: 绕过缓存旁路数
expired: 过期的缓存数
stale: 生效缓存的数量
updating: 缓存更新的次数
revalidated:重新验证的缓存数
hit: 缓存命中数
scarce: 未达缓存要求的请求次数
requestMsecCounter: 累积的请求处理时间(毫秒)
requestMsec: 请求处理时间的平均值(毫秒)
upstreamZones:
server: server地址
requestCounter: 转发到此服务器的客户端连接总数
inBytes: 从该服务器接收的总字节数
outBytes:发送到此服务器的总字节数
responses:
1xx,2xx,3xx,4xx,5xx: 表示响应状态吗
requestMsecCounter: 包括upstream在内的累计请求处理时间数(毫秒)
requestMsec:
time: 请求处理时间
msecs: 包括upstream在内的请求处理时间
responseMsecCounter: 仅upstream响应处理时间累加值(毫秒)
responseMsecs: 仅upstream响应处理时间平均值(毫秒)
weight: server的当前weight值
maxFails: server的当前max_fails设置
failTimeout: server的当前的fail_timeout设置
backup: server的当前backup设置
down: 服务器的当前down设置
'''
import requests
import json
import time
import commands
class Check_Nginx:
''' 此来用来检测nginx指标'''
def __init__(self,result):
self.result = result
print(json.dumps(self.result)) #将列表转换为字符串类型,否则夜莺不识别
def main():
result = []
ip = commands.getoutput('''ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|head -n 1''')
timestamp = int(time.time())
r = requests.get('http://10.88.10.35/vhost_status/format/json')
dictt = json.loads(r.text) #将json数据转换为字典
'''
下面三行用来获取server main中的request信息
nginx运行时间等于nowMsec-loadMsec,结果值单位为毫秒
'''
nowMsec = float(dictt.get('nowMsec')) #将整形转换为浮点型
loadMsec = float(dictt.get('loadMsec'))
nginx_uptime = round((nowMsec-loadMsec)/3600000,2) #nginx运行时间,单位小时
nginx_accept = dictt.get('connections').get('accepted')
nginx_handled = dictt.get('connections').get('handled')
nginx_requests = dictt.get('connections').get('requests')
nginx_active = dictt.get('connections').get('active')
nginx_reading = dictt.get('connections').get('reading')
nginx_writing = dictt.get('connections').get('writing')
nginx_waiting = dictt.get('connections').get('waiting')
nginx_uptime = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.uptime',
'value': nginx_uptime
}
nginx_accept = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.accept',
'value': nginx_accept
}
nginx_handled = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.handled',
'value': nginx_handled
}
nginx_request = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.requests',
'value': nginx_requests
}
nginx_active = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.active',
'value': nginx_active
}
nginx_reading = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.reading',
'value': nginx_reading
}
nginx_writing = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.writing',
'value': nginx_writing
}
nginx_waiting = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.waiting',
'value': nginx_waiting
}
result.append(nginx_uptime)
result.append(nginx_accept)
result.append(nginx_handled)
result.append(nginx_request)
result.append(nginx_active)
result.append(nginx_reading)
result.append(nginx_writing)
result.append(nginx_waiting)
'''
下面用来获取共享内存的使用情况
'''
sharememory_maxsize = dictt.get('sharedZones').get('maxSize')
sharememory_usedsize = dictt.get('sharedZones').get('usedSize')
sharememory_usedNode = dictt.get('sharedZones').get('usedNode')
server_maxsize = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.sharememory.maxsize',
'value': sharememory_maxsize
}
server_usedsize = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.sharememory.usedsize',
'value': sharememory_usedsize
}
server_usedNode = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.sharememory.usedNode',
'value': sharememory_usedNode
}
result.append(server_maxsize)
result.append(server_usedsize)
result.append(server_usedNode)
'''
下面的用来获取server zones中的信息
'''
server_requestCounter = dictt.get('serverZones').get('localhost').get('requestCounter')#serverzone的requests
server_inbytes = dictt.get('serverZones').get('localhost').get('inBytes') #traffic Rcvd
server_outbytes = dictt.get('serverZones').get('localhost').get('outBytes') #traffic send
server_code1 = dictt.get('serverZones').get('localhost').get('responses').get('1xx')
server_code2 = dictt.get('serverZones').get('localhost').get('responses').get('2xx')
server_code3 = dictt.get('serverZones').get('localhost').get('responses').get('3xx')
server_code4 = dictt.get('serverZones').get('localhost').get('responses').get('4xx')
server_code5 = dictt.get('serverZones').get('localhost').get('responses').get('5xx')
server_requestCounter = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.requestCounter',
'value': server_requestCounter
}
server_inbytes = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.inbytes',
'value': server_inbytes
}
server_outbytes = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.outbytes',
'value': server_outbytes
}
server_code1 = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.1xx',
'value': server_code1
}
server_code2 = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.2xx',
'value': server_code2
}
server_code3 = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.3xx',
'value': server_code3
}
server_code4 = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.4xx',
'value': server_code4
}
server_code5 = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'nginx.server.5xx',
'value': server_code5
}
result.append(server_requestCounter)
result.append(server_inbytes)
result.append(server_outbytes)
result.append(server_code1)
result.append(server_code2)
result.append(server_code3)
result.append(server_code4)
result.append(server_code5)
'''下面的用来获取upstream的信息'''
upstreamZones = dictt.get('upstreamZones').get('schedule')
for i in upstreamZones: #遍历列表
get_upstreamip = i.get('server').split(':')[0]#获取代理的ip地址
#get_upstreamip = i.get('server')#获取代理的ip地址和端口
get_requestCounter = i.get('requestCounter')
get_inBytes = i.get('inBytes')
get_outBytes = i.get('outBytes')
get_upstream_1xx = i.get('responses').get('1xx')
get_upstream_2xx = i.get('responses').get('2xx')
get_upstream_3xx = i.get('responses').get('3xx')
get_upstream_4xx = i.get('responses').get('4xx')
get_upstream_5xx = i.get('responses').get('5xx')
get_responseMsec = i.get('responseMsec')
get_ip = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.ip',
'value': None,
}
if get_upstreamip is not None:
get_ip['value'] = 1
else:
get_ip['value'] = 0
nginx_requestCounter = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.requestCounter',
'value': get_requestCounter
}
nginx_inBytes = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.inBytes',
'value': get_inBytes
}
nginx_outBytes = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.outBytes',
'value': get_outBytes
}
upstream_1xx = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.1xx',
'value': get_upstream_1xx
}
upstream_2xx = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.2xx',
'value': get_upstream_2xx
}
upstream_3xx = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.3xx',
'value': get_upstream_3xx
}
upstream_4xx = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.4xx',
'value': get_upstream_4xx
}
upstream_5xx = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.5xx',
'value': get_upstream_5xx
}
get_responsetime = {
'endpoint': ip,
'tags': 'upstreamip=%s'%get_upstreamip,
'timestamp': timestamp,
'metric': 'nginx.upstream.responsetime',
'value': get_responseMsec
}
result.append(get_ip)
result.append(nginx_requestCounter)
result.append(nginx_inBytes)
result.append(nginx_outBytes)
result.append(upstream_1xx)
result.append(upstream_2xx)
result.append(upstream_3xx)
result.append(upstream_4xx)
result.append(upstream_5xx)
result.append(get_responsetime)
Check_Nginx(result)
if __name__ == '__main__':
main()
3、oracle指标采集插件,如下:
#!/usr/bin/python3
#-*- coding:utf-8 -*-
"""
@此脚本的功能为监控oracle指标,监控oracle地址为
192.168.3.200
@author:gongguan
@date: 20210624
"""
import cx_Oracle
import subprocess
import time
import json
def get_value():
ip = subprocess.getoutput('''ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|head -n 1''')
timestamp = int(time.time())
result = []
value = 0
db = cx_Oracle.connect('sffoc','sfhxfoc#3','192.168.3.200:1521/foc')
r = db.cursor()
sql = "select sysdate - (select max(insert_date) from t7107 ) from dual"
r.execute(sql)
get_all = r.fetchall()
get_result = get_all[0][0]
db.close()
get_value = round(get_result,2)
if ( get_value > 1.05 ):
value = 0
else:
value = 1
result_value = {
'endpoint': ip,
'timestamp': timestamp,
'tags': 'oracle_ip=192.168.3.200',
'metric': 'oracle_value',
'value': value
}
result.append(result_value)
print(json.dumps(result))
if __name__ == '__main__':
get_value()
4、检测磁盘是否挂载插件,如下:
#!/usr/bin/python3
#-*- coding:utf-8 -*-
import os
import platform
import json
import re
import time
import requests
import importlib,sys
import subprocess
importlib.reload(sys)
#sys.setdefaultencoding("utf-8")
systype = platform.system()
ip = subprocess.getoutput("hostname -i")
timestamp = int(time.time())
def do_cmd():
if(systype == "Windows"):
disk_list = [i.strip() for i in
os.popen('C:\\WINDOWS\\system32\\WindowsPowerShell\\v1.0\\powershell.exe net use').readlines()][3:]
value = 0
result = []
for mount_name in mount_name_list.split(','):
for disk_info in disk_list:
disk = re.findall(r"^[ok|OK].*\w{3}\d{1}\.\w{5}\.\w{3}\\\w{3}",disk_info)
if len(disk) !=0:
strr = str(disk[0]) #根据返回列表中的元素的值
check_ok = strr.split()[0] #根据空格截取字符串的第一个索引值
if (check_ok == "ok") or (check_ok == "OK"):
value = 1
else:
value = 0
result_value = {
'endpoint': '192.168.18.93',
'tags': "",
'timestamp': timestamp,
'metric': 'disk.mount.efb',
'value': value,
'step': 5,
'extra': "",
}
result.append(result_value)
url = "http://192.168.22.183:8008/api/transfer/push"
r = requests.post(url=url,data=json.dumps(result))
print(r)
return result
elif(systype == "Linux"):
value = 0
linux_result = []
result = subprocess.getoutput("df -h |grep '192.168.10.4'")
if result == '':
value = 0
else:
value = 1
check_result = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'mount104',
'value': value,
}
linux_result.append(check_result)
print(json.dumps(linux_result))
if __name__ == "__main__":
do_cmd()
5、判断文件夹时间插件,如下:
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import time
import datetime
import math
import json
import subprocess #python执行shell用此模块,python2用commands模块
def check_time():
'''
@本脚本主要功能为获取两个文件夹时间与系统时间做对比
如果时间差大于40分钟,则告警
@author: gongguan
@time: 20210529
'''
result = []
value = 0
value1 = 0
ip = subprocess.getoutput('''ifconfig `route|grep '^default'|awk '{print $NF}'`|grep inet|awk '{print $2}'|head -n 1''')
timestamp = int(time.time())
date_time = datetime.datetime.now() #获取当前系统时间,注意此时的类型为data_time日期类型
sys_time = str(date_time).split(' ')[1] #如果要用split切割,需要先转换为字符串才能切割
sys_hour = sys_time.split(':')[0] #获取系统的小时
sys_min = sys_time.split(':')[1] #获取系统的分钟数
one_modify_time = subprocess.getoutput('''stat one | grep Modify | awk '{print $3}' |awk -F "." '{print $1}' ''')
one_hour = subprocess.getoutput(''' stat one | grep Modify | awk '{print $3}' |awk -F "." '{print $1}' |awk -F ':' '{print $1}' ''')
one_min=subprocess.getoutput(''' stat one | grep Modify | awk '{print $3}' |awk -F "." '{print $1}' |awk -F ':' '{print $2}' ''')
two_hour = subprocess.getoutput(''' stat one | grep Modify | awk '{print $3}' |awk -F "." '{print $1}' |awk -F ':' '{print $1}' ''')
two_min = subprocess.getoutput(''' stat one | grep Modify | awk '{print $3}' |awk -F "." '{print $1}' |awk -F ':' '{print $2}' ''')
#接下来将获取到的文件夹时间与系统的时间做对比并执行逻辑
if (abs(int(sys_hour) - int(one_hour)) == 23 ) and ((60 - int(sys_min) + int(one_min)) <= 40):
value = 1
elif (abs(int(sys_hour) - int(one_hour)) == 0 ) and ( abs(int(sys_min) - int(one_min)) <= 40 ):
value = 1
else:
value = 0
one_time = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'one.time',
'value': value
}
result.append(one_time)
#接下来获取prog文件夹时间,与系统时间做对比并执行逻辑
if (abs(int(sys_hour) - int(two_hour)) == 23 ) and ((60 - int(sys_min) + int(two_min)) <= 40):
value1 = 1
elif (abs(int(sys_hour) - int(two_hour)) == 0 ) and ( abs(int(sys_min) - int(two_min)) <= 40 ):
value1 = 1
else:
value1 = 0
two_time = {
'endpoint': ip,
'tags': '',
'timestamp': timestamp,
'metric': 'two.time',
'value': value1
}
result.append(two_time)
check_result = json.dumps(result)
print(check_result)
if __name__ == '__main__':
check_time()
6、夜莺url二次处理插件,如下:
#!/usr/bin/python3.6
#coding: utf-8
"""
@Created on 2021-04-21
@作用:用于夜莺URL告警接口二次处理
@author: gongguan
"""
from flask import Flask, request, jsonify,redirect
import json
import ast
import pymysql
import sys
import requests
import urllib3
http = urllib3.PoolManager()
app = Flask(__name__)
app.debug = True
#app.config['JSON_AS_ASCII'] = False
@app.route("/index/",methods=["GET","POST"])
def post_http():
if request.method == 'POST':
get_data = request.get_data()
get_data = json.loads(get_data) #将获得到的字符串数据转换为字典
str_data = str(get_data)
file = open("alert.json","w+",encoding='utf-8')
file.write(str_data) #将夜莺推送过来的告警信息写入文件中
file.close()
get_alert = post_alert()
return str_data,get_alert
else:
return "只接受post请求"
def post_alert():
varr = "100"
result_bz,result_ip = select_mysql()
result_name = get_name()
result_history = get_history()
result_stragety = get_stragety()
result_url = get_url()
result_code,result_value = status_code()
if int(result_value) == 0:
url = "http://127.0.0.1:8848/nacos/v1/cs/configs"
file = open("dictfile","r",encoding='utf-8') #dictfile为基础模板
dictt = ast.literal_eval(file.read()) #将读取的字符串转换为字典
file.close()
listname = ast.literal_eval(dictt.get('content')) #修改dictfile中的字典的值
listname[0].get('labels')['alertname'] = result_name #修改字典的值
listname[0].get('labels')['instance'] = result_ip #修改字典的值
listname[0].get('labels')['value'] = result_value
listname[0].get('labels')['target'] = result_code
listname[0].get('annotations')['summary'] = result_bz
listname[0].get('annotations')['description'] = result_url
listname[0].get('annotations')['recovery_value'] = 200 #其余都是变量,唯独这个为常量
dictt["content"] = json.dumps(listname) #转换为json格式后给字典赋值
requests.post(url=url ,params=dictt) #将修改后的内容推送给nacos
node_file = open('/usr/local/node_exporter/key/key.prom','r+',encoding='utf-8')
node_file.write('result_value')
node_file.write(' ')
node_file.write(result_value)
node_file.close()
return "success"
def public():
file = open("alert.json","r",encoding='utf-8') #打开文件
rfile = file.read() #读取文件
dict_file = ast.literal_eval(rfile) #将字符串转换为字典
file.close() #关闭文件
return dict_file
def get_ip():
dict_file = public()
listfile = dict_file.get('detail') #获取指定的键值,结果为列表
dictt = listfile[0] #获取列表的第一个索引内容,得到内容还是一个字典
dictt_one = dictt.get('tags').get('host') #获取键值为tags的键所对应的值,结果依旧是一个字典
#dictt_two = dictt_one.get('host') #获取host键所对应的值 上条语句一起写了
ip = dictt_one.split(':')[0] #通过split方法分割字符串,以:为分隔符,分割后打印第一个索引字符
return ip
def get_name(): #获取策略名称
name = public() #调用公共函数public()
get_name = name.get('sname')
return get_name
def get_history(): #获取报警详情id
history = public()
get_history = history.get('id') #查找字典键id对应的值
return get_history
def get_stragety():
stragety = public()
get_stragety = stragety.get('sid')
return get_stragety
def get_url():
get_url = public()
ip_port = get_url.get('detail')[0].get('tags').get('host')
with open("/home/n9e/etc/probe.yml",'r',encoding='utf-8') as f:
for line in f:
if line.find(ip_port) != -1:
add_url = line.split('-')[1]
return add_url
def status_code():
get_code = public()
status_code = get_code.get('value').split(':')[0] #返回指标名称
status_value = get_code.get('value').split(':')[1] #返回当前返回值
return status_code,status_value
def select_mysql():
getip = get_ip()
conn = pymysql.connect(host="10.88.22.188",user="root",password="sf@PWD6950",database="n9e_rdb",charset='utf8')
cursor = conn.cursor()
sql = "select note from resource where ident=%s"
cursor.execute(sql,[getip])
data = cursor.fetchone()
if data is None:
result_data = "没有数据"
else:
bz = ''.join(data)
return bz,getip
cursor.close()
conn.close()
if __name__ == '__main__':
app.run(host='0.0.0.0',port=5001)
#这里指定了地址和端口
#dictfile文件内容如下:
{
"dataId": "etc.prometheus",
"group": "DEFAULT_GROUP",
"content": """[{ \
"labels": { \
"alertname": "workd", \
"instance": "910", \
"value": "1", \
"target": "code" \
}, \
"annotations": { \
"summary": "15.88.22.200" , \
"description": "系统已经挂了,请检查!!", \
"recovery_value": "200" \
}
}]"""
}


