exception_warning.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. # coding:utf-8
  2. '''
  3. 异常警告事件监控脚本:supervisor程序异常中止时候自动发送异常警告消息到redis中
  4. auth : zhaiyifei
  5. time : 2021-12-14
  6. '''
  7. import os
  8. import sys
  9. import json
  10. import datetime
  11. from supervisor import childutils
  12. local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  13. if local_path not in sys.path:
  14. sys.path.append(local_path)
  15. from monitor.utils.comm_tools import get_redis_connection
  16. def usage(exitstatus=255):
  17. sys.exit(exitstatus)
  18. class CrashExceptionWarning:
  19. def __init__(self, programs, anyy):
  20. self.programs = programs
  21. self.any = anyy
  22. self.stdin = sys.stdin
  23. self.stdout = sys.stdout
  24. self.stderr = sys.stderr
  25. self.event_dict = {}
  26. self.con = get_redis_connection()
  27. def _get_now(self):
  28. '''获取时间'''
  29. now = datetime.datetime.now()
  30. now_str = now.strftime('%Y-%m-%d %H:%M:%S')
  31. return now, now_str
  32. def _clean_event(self):
  33. '''清理过期事件,超过30秒既为过期事件'''
  34. now = self._get_now()[0]
  35. key_list = []
  36. for key, value in self.event_dict.items():
  37. diff = now - value[0]
  38. if diff.seconds > 30:
  39. key_list.append(key_list)
  40. for key in key_list:
  41. if key in self.event_dict:
  42. self.event_dict.pop(key)
  43. def send_redis(self, content):
  44. '''推送消息redis'''
  45. data = json.dumps(content)
  46. self.con.rpush('yf_dingding', data)
  47. def runforever(self, test=False):
  48. while True:
  49. headers, payload = childutils.listener.wait(
  50. self.stdin, self.stdout)
  51. pheaders, pdata = childutils.eventdata(payload + '\n')
  52. app_name = pheaders['processname']
  53. if app_name == 'crasheding':
  54. childutils.listener.ok(self.stdout)
  55. continue
  56. eventname = headers['eventname']
  57. if eventname != 'PROCESS_STATE_EXITED':
  58. now_str = self._get_now()[1]
  59. if eventname == 'PROCESS_STATE_RUNNING':
  60. if app_name in self.event_dict:
  61. msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 成功'.format(
  62. app_name, self.event_dict[app_name], now_str)
  63. self.event_dict.pop(app_name)
  64. self.send_redis({'content': msg})
  65. elif eventname == 'PROCESS_STATE_FATAL':
  66. msg = '[{}] 程序于 {} 启动失败'.format(app_name, now_str)
  67. if app_name in self.event_dict:
  68. msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 失败'.format(
  69. app_name, self.event_dict[app_name], now_str)
  70. self.event_dict.pop(app_name)
  71. self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
  72. childutils.listener.ok(self.stdout)
  73. continue
  74. if int(pheaders['expected']):
  75. childutils.listener.ok(self.stdout)
  76. continue
  77. self.stderr.write('unexpected exit, mailing\n')
  78. self.stderr.flush()
  79. self.event_dict[app_name] = self._get_now()[1]
  80. msg = '[{}] 程序于 {} 发生异常中止'.format(
  81. app_name, self.event_dict[app_name], self._get_now()[1])
  82. self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
  83. childutils.listener.ok(self.stdout)
  84. def main():
  85. programs = []
  86. prog = CrashExceptionWarning(programs, True)
  87. prog.runforever()
  88. if __name__ == '__main__':
  89. main()