| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- # coding:utf-8
- '''
- 异常警告事件监控脚本:supervisor程序异常中止时候自动发送异常警告消息到redis中
- auth : zhaiyifei
- time : 2021-12-14
- '''
- import os
- import sys
- import json
- import datetime
- from supervisor import childutils
- local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
- if local_path not in sys.path:
- sys.path.append(local_path)
- from monitor.utils.comm_tools import get_redis_connection
- def usage(exitstatus=255):
- sys.exit(exitstatus)
- class CrashExceptionWarning:
- def __init__(self, programs, anyy):
- self.programs = programs
- self.any = anyy
- self.stdin = sys.stdin
- self.stdout = sys.stdout
- self.stderr = sys.stderr
- self.event_dict = {}
- self.con = get_redis_connection()
- def _get_now(self):
- '''获取时间'''
- now = datetime.datetime.now()
- now_str = now.strftime('%Y-%m-%d %H:%M:%S')
- return now, now_str
- def _clean_event(self):
- '''清理过期事件,超过30秒既为过期事件'''
- now = self._get_now()[0]
- key_list = []
- for key, value in self.event_dict.items():
- diff = now - value[0]
- if diff.seconds > 30:
- key_list.append(key_list)
- for key in key_list:
- if key in self.event_dict:
- self.event_dict.pop(key)
- def send_redis(self, content):
- '''推送消息redis'''
- data = json.dumps(content)
- self.con.rpush('yf_dingding', data)
- def runforever(self, test=False):
- while True:
- headers, payload = childutils.listener.wait(
- self.stdin, self.stdout)
- pheaders, pdata = childutils.eventdata(payload + '\n')
- app_name = pheaders['processname']
- if app_name == 'crasheding':
- childutils.listener.ok(self.stdout)
- continue
- eventname = headers['eventname']
- if eventname != 'PROCESS_STATE_EXITED':
- now_str = self._get_now()[1]
- if eventname == 'PROCESS_STATE_RUNNING':
- if app_name in self.event_dict:
- msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 成功'.format(
- app_name, self.event_dict[app_name], now_str)
- self.event_dict.pop(app_name)
- self.send_redis({'content': msg})
- elif eventname == 'PROCESS_STATE_FATAL':
- msg = '[{}] 程序于 {} 启动失败'.format(app_name, now_str)
- if app_name in self.event_dict:
- msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 失败'.format(
- app_name, self.event_dict[app_name], now_str)
- self.event_dict.pop(app_name)
- self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
- childutils.listener.ok(self.stdout)
- continue
- if int(pheaders['expected']):
- childutils.listener.ok(self.stdout)
- continue
- self.stderr.write('unexpected exit, mailing\n')
- self.stderr.flush()
- self.event_dict[app_name] = self._get_now()[1]
- msg = '[{}] 程序于 {} 发生异常中止'.format(
- app_name, self.event_dict[app_name], self._get_now()[1])
- self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
- childutils.listener.ok(self.stdout)
- def main():
- programs = []
- prog = CrashExceptionWarning(programs, True)
- prog.runforever()
- if __name__ == '__main__':
- main()
|