root 4 лет назад
Родитель
Сommit
6b9bb4dfd0

+ 2 - 1
.gitignore

@@ -57,4 +57,5 @@ docs/_build/
 
 # PyBuilder
 target/
-
+.idea/
+dump.rdb

+ 76 - 1
README.md

@@ -1,3 +1,78 @@
 # yufei_supervisord
+```
+云飞监控: 
 
-云飞运行状态监控工具
+	主程序:负责每分钟自动读取配置文件,将监控目录下符合要求的文件自动加入supervisor
+	异常警告事件监控脚本:supervisor程序异常中止时候自动发送异常警告消息到redis中
+	内置监控程序:
+		redis监控:监控redis运行
+		钉钉推送:从redis中获取信息,并自动推送到钉钉
+		系统监控:监控服务器运行状态,内存,磁盘,每间隔1秒,自动存储到redis中
+
+	配置文件:
+		配置文件采用 ymal格式的文本文件
+		development_env    测试环境配置文件目录
+		production_env     正式环境配置文件目录
+
+		两个目录中的文件名称和格式都一致,如下
+
+            monitor.yml   负责配置指定监控目录配置
+                格式如下:
+                    # 所监控的目录名称,目录下所有符合 filter 和 exclude规则的文件都自动加入supervisord管理器中
+                    '/root/project/yufei_supervisord/monitor_test':
+                      # 需要后台执行的python脚本的解释器绝对路径
+                      name: '本地测试'    # 此参数必须存在
+                      executable: '/root/project/yufei_supervisord/venv/bin/python'    # 此参数必须存在
+                      filter:  # 只匹配.py *.monitor.py后缀的文件, 此参数必须存在
+                        - '*.py'
+                        - '*.monitor.py'
+                      exclude:    # 排除以__, ex_开头的文件, 此参数为可选,可以不需要
+                        - '__*'
+                        - 'ex_*'
+            redis.yml
+                负责配置redis相关配置
+                格式如下:
+                    connection:
+                      host: '127.0.0.1'
+                      port: 6379
+                      db: 6
+                      password: null      # 没有密码填写 null
+
+            dingding.yml
+                负责配置钉钉机器配置
+                格式如下:
+                    webhook: 'https://oapi.dingtalk.com/robot/send?access_token=11532b3909549951f493087e873ae603fda52011940da33c2a33992a4674bb20'
+                    secret: 'SECf2d2b723c60120b1dcd66da866423e33977d1bda555124b00505067e8282b1e2'
+
+
+使用方法:
+1. 首先需要安装redis数据库,配置文件存在/etc/redis.conf
+如果已有redis,则需要停止已有redis,云飞监控将托管redis,负责监控redis的运行状态与异常自动重启
+
+    redis负责存储需要推送到钉钉的消息
+    redis库采用 默认采用6号数据库
+
+    key名              数据类型   描述信息                                                                             需要跟随主题同时发送的系统监控信息。
+    yf_dingding          list    负责存储需要发送到钉钉的消息信息 储存内容为json.dump后的对象 {'content': '消息主体', 'monitor_list': ['disk', 'memory']}
+    yf_sys_monitor       hash    负责储存系统监控信息,其中 disk key名 表示 磁盘监控信息, memory key名 表示 内存监控信息
+
+2. 在python虚拟环境中安装依赖库
+3. 配置对应的配置文件
+    	development_env    测试环境配置文件目录
+        production_env     正式环境配置文件目录
+4. 查看使用帮助 python start.py --help
+4. 使用刚才配置好的虚拟环境中的python运行start.py 启动监控
+5. 需要推送到钉钉群的其他程序
+	其他程序如果需要推送钉钉群,只需要将需要推送的消息按照指定格式存入redis,即可实时推送。
+	content : 消息主题
+        monitor_list: 表示发送钉钉消息时候,自动获取对应的系统监控信息, 不需要的话可以填写 空列表 []
+				  disk 表示磁盘信息
+				  memory 表示内存信息
+	例如:
+		content = {'content': '消息主体', 'monitor_list': ['disk', 'memory']}
+		data = json.dumps(content)
+		con = redis.Redis(host=host, port=port, db=db, password=password, decode_responses=True)
+		con.rpush('yf_dingding', data)
+
+	decode_responses=True 一定要开!!!!!
+```

+ 4 - 0
config/development_env/dingding.yml

@@ -0,0 +1,4 @@
+# 钉钉机器人配置文件
+
+webhook: 'https://oapi.dingtalk.com/robot/send?access_token=11532b3909549951f493087e873ae603fda52011940da33c2a33992a4674bb20'
+secret: 'SECf2d2b723c60120b1dcd66da866423e33977d1bda555124b00505067e8282b1e2'

+ 13 - 0
config/development_env/monitor.yml

@@ -0,0 +1,13 @@
+# 监控配置文件
+
+# 所监控的目录名称,目录下所有符合 filter 和 exclude的文件都自动加入supervisord管理器中
+'/root/project/yufei_supervisord/monitor_test':
+  # 需要后台执行的python脚本的解释器绝对路径
+  name: '本地测试'
+  executable: '/root/project/yufei_supervisord/venv/bin/python'
+  filter:  # 只匹配.py *.monitor.py后缀的文件
+    - '*.py'
+    - '*_monitor.py'
+  exclude:    # 排除以__, ex_开头的文件
+    - '__*'
+    - 'ex_*'

+ 7 - 0
config/development_env/redis.yml

@@ -0,0 +1,7 @@
+# redis配置信息
+
+connection:
+  host: '127.0.0.1'
+  port: 6379
+  db: 6
+  password: null

+ 4 - 0
config/production_env/dingding.yml

@@ -0,0 +1,4 @@
+# 钉钉机器人配置文件
+
+webhook: 'https://oapi.dingtalk.com/robot/send?access_token=11532b3909549951f493087e873ae603fda52011940da33c2a33992a4674bb20'
+secret: 'SECf2d2b723c60120b1dcd66da866423e33977d1bda555124b00505067e8282b1e2'

+ 3 - 0
config/production_env/monitor.yml

@@ -0,0 +1,3 @@
+# 监控配置文件
+
+

+ 7 - 0
config/production_env/redis.yml

@@ -0,0 +1,7 @@
+# redis配置信息
+
+connection:
+  host: '127.0.0.1'
+  port: 6379
+  db: 6
+  password: null

+ 1 - 0
config/run_mode

@@ -0,0 +1 @@
+development

+ 6 - 0
monitor_test/ceshi.py

@@ -0,0 +1,6 @@
+# coding:utf-8
+
+import time
+while True:
+    print('1')
+    time.sleep(3)

+ 18 - 0
requirements.txt

@@ -0,0 +1,18 @@
+appdirs==1.4.4
+certifi==2021.10.8
+charset-normalizer==2.0.9
+Deprecated==1.2.13
+DingtalkChatbot==1.5.3
+fs==2.4.13
+idna==3.3
+psutil==5.8.0
+python-crontab==2.5.1
+python-dateutil==2.8.1
+pytz==2021.3
+PyYAML==6.0
+redis==4.0.2
+requests==2.26.0
+six==1.16.0
+supervisor==4.2.2
+urllib3==1.26.7
+wrapt==1.13.3

+ 0 - 0
src/__init__.py


+ 0 - 0
src/build_monitor/__init__.py


+ 44 - 0
src/build_monitor/dingding_monitor.py

@@ -0,0 +1,44 @@
+# coding:utf-8
+
+'''自动推送消息到钉钉'''
+
+import os
+import sys
+import json
+import time
+
+from dingtalkchatbot.chatbot import DingtalkChatbot
+
+local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if local_path not in sys.path:
+    sys.path.append(local_path)
+
+from src.utils.comm_tools import get_redis_connection, get_dingding_config
+
+
+class DingDingUtils:
+    def __init__(self):
+        # 钉钉机器人地址和密钥
+        ding_config = get_dingding_config()
+        webhook = ding_config['webhook']
+        secret = ding_config['secret']
+        self.ding = DingtalkChatbot(webhook=webhook, secret=secret)
+
+    def run(self):
+        con = get_redis_connection()
+        while True:
+            v = con.blpop('yf_dingding')[1]
+            data = json.loads(v)
+            content, monitor_list = data['content'], data.get('monitor_list', [])
+            for k in monitor_list:
+                m = con.hget('yf_sys_monitor', k)
+                if m:
+                    content += '\n\n{}'.format(m)
+            self.ding.send_text(content)
+            time.sleep(0.6)
+
+
+if __name__ == '__main__':
+    DingDingUtils().run()
+
+

+ 47 - 0
src/build_monitor/redis_monitor.py

@@ -0,0 +1,47 @@
+# coding:utf-8
+
+import os
+import sys
+import psutil
+import subprocess
+
+local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if local_path not in sys.path:
+    sys.path.append(local_path)
+
+from src.utils.comm_tools import shell_cmd
+
+
+def _has_redis():
+    is_redis = False
+    for proc in psutil.process_iter(['pid', 'name']):
+        if proc.name == 'redis-server':
+            is_redis = True
+            break
+    return is_redis
+
+
+def _ini_redis():
+    cmd = "which redis-server"
+    code, stdout, stderr = shell_cmd(cmd)
+    if not os.path.isfile(stdout):
+        subprocess.run("yum install -y redis", shell=True)
+
+
+def _start_redis():
+    cmd = ("echo 1024 > /proc/sys/net/core/somaxconn; "
+           "sysctl vm.overcommit_memory=1;echo never > /sys/kernel/mm/transparent_hugepage/enabled"
+           )
+    subprocess.run(cmd, shell=True)
+    subprocess.run("redis-server /etc/redis.conf", shell=True)
+
+
+def redis_deamond():
+    '''redis后台守护'''
+    if not _has_redis():
+        _ini_redis()
+        _start_redis()
+
+
+if __name__ == '__main__':
+    redis_deamond()

+ 55 - 0
src/build_monitor/system_monitor.py

@@ -0,0 +1,55 @@
+# coding:utf-8
+
+
+'''系统监控,目前仅支持disk,memory监控'''
+
+import os
+import sys
+import json
+import time
+
+
+local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if local_path not in sys.path:
+    sys.path.append(local_path)
+
+
+from src.utils.comm_tools import shell_cmd, get_redis_connection
+
+
+class SystemdMonitor:
+    def __init__(self):
+        self.con = get_redis_connection()
+        self.table = 'yf_sys_monitor'
+
+    def get_memory_info(self):
+        cmd = "free -h"
+        code, stdout, stderr = shell_cmd(cmd)
+        if code == 0:
+            content = ' ' * 16 + stdout
+            self.con.hset(self.table, 'memory', content)
+
+    def get_disk_info(self):
+        cmd = "df -h"
+        code, stdout, stderr = shell_cmd(cmd)
+        if code == 0:
+            line_list = []
+            for line in stdout.splitlines():
+                temp_key = line.split()[0].strip()
+                if temp_key and 'tmpfs' in temp_key:
+                    continue
+                line_list.append(line)
+            content = '\n'.join(line_list)
+            self.con.hset(self.table, 'disk', content)
+
+    def run(self):
+        while True:
+            self.get_disk_info()
+            self.get_memory_info()
+            time.sleep(1)
+
+
+if __name__ == "__main__":
+    SystemdMonitor().run()
+
+

+ 45 - 0
src/start.py

@@ -0,0 +1,45 @@
+# coding:utf-8
+import argparse
+import os
+import sys
+from argparse import ArgumentParser
+from crontab import CronTab
+
+
+class StartSupervisord:
+    def __init__(self):
+        self.cron = CronTab(user='root')
+        self.python_bin = sys.executable
+        self.main_file = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'yf_supervisord.py')
+
+    def init_crond(self):
+        cmd = "{} {}".format(self.python_bin, self.main_file)
+        for job in self.cron:
+            if job.command in cmd:
+                break
+        else:
+            new_job = self.cron.new(cmd, comment="supervise监控脚本,禁止修改")
+            new_job.minute.every(2)
+            self.cron.write()
+
+
+if __name__ == '__main__':
+    args = ArgumentParser(formatter_class=argparse.RawTextHelpFormatter, description=''' 云飞后台监控管理启动程序:
+    使用方法如下:
+        python start.py --mode  development
+''')
+    args.add_argument('--mode', type=str, choices=['development', 'production'], required=True, help='''
+development       开发模式
+production        生产模式
+''')
+    result = args.parse_args()
+    mode = result.mode
+
+    setting_path = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'config/run_mode')
+    with open(setting_path, 'w') as fw:
+        fw.write(mode)
+
+    StartSupervisord().init_crond()
+    print('******   开启程序成功,两分钟后自动运行supervisord   ******')
+
+

+ 0 - 0
src/utils/__init__.py


+ 85 - 0
src/utils/comm_tools.py

@@ -0,0 +1,85 @@
+# coding:utf-8
+
+import os
+import sys
+import yaml
+import redis
+import subprocess
+
+
+def _get_config_dir():
+    base_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'config')
+    mode_path = os.path.join(base_dir, 'run_mode')
+    with open(mode_path, 'r') as fr:
+        mode = fr.read().strip()
+    config_dir = os.path.join(base_dir, '{}_env'.format(mode))
+    return config_dir
+
+
+def _load_ymal(path):
+    with open(path, 'r') as fr:
+        data = yaml.load(fr, yaml.FullLoader)
+    return data
+
+
+def get_monitor_config():
+    '''获取监控配置'''
+    _config_dir = _get_config_dir()
+    monitor_path = os.path.join(_config_dir, 'monitor.yml')
+    monitor_data = _load_ymal(monitor_path)
+    build_dir = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'build_monitor')
+    monitor_data[build_dir] = {
+        'name': 'build',
+        'executable': sys.executable,
+        'filter': ['*_monitor.py'],
+        'exclude': []
+    }
+    return monitor_data
+
+
+def get_redis_connection():
+    '''获取redis连接'''
+    _config_dir = _get_config_dir()
+    redis_path = os.path.join(_config_dir, 'redis.yml')
+    result = _load_ymal(redis_path)
+    data = result['connection']
+    host, port, db = data['host'], int(data['port']), int(data['db'])
+    try:
+        password = data['password']
+    except KeyError as e:
+        password = None
+    connection = redis.Redis(host=host, port=port, db=db, password=password, decode_responses=True)
+    return connection
+
+
+def get_dingding_config():
+    _config_dir = _get_config_dir()
+    path = os.path.join(_config_dir, 'dingding.yml')
+    data = _load_ymal(path)
+    return data
+
+
+def shell_cmd(cmd, is_wait=True):
+    '''执行shell命令
+    :param is_wait 是否阻塞等待命令运行结束,Ture 表示阻塞模式
+    :return
+        code, stdout, stderr
+
+        code            命令运行状态码,0表示运行成功
+        stdout          命令运行输出结果
+        stderr          命令运行错误信息
+    '''
+    code, stdout, stderr = 0, '', ''
+    proc = subprocess.Popen(cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    if is_wait:
+        stdout, stderr = proc.communicate()
+        code = proc.returncode
+        stdout = stdout.decode('utf-8').strip()
+        stderr = stderr.decode('utf-8').strip()
+    return code, stdout, stderr
+
+
+if __name__ == "__main__":
+    con = get_redis_connection()
+    print(con)
+

+ 115 - 0
src/utils/exception_warning.py

@@ -0,0 +1,115 @@
+# coding:utf-8
+
+'''
+异常警告事件监控脚本:supervisor程序异常中止时候自动发送异常警告消息到redis中
+auth : zhaiyifei
+time : 2021-12-14
+'''
+
+
+import os
+import sys
+import json
+import datetime
+from supervisor import childutils
+
+local_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+if local_path not in sys.path:
+    sys.path.append(local_path)
+
+from src.utils.comm_tools import get_redis_connection
+
+
+def usage(exitstatus=255):
+    sys.exit(exitstatus)
+
+
+class CrashExceptionWarning:
+    def __init__(self, programs, anyy):
+        self.programs = programs
+        self.any = anyy
+        self.stdin = sys.stdin
+        self.stdout = sys.stdout
+        self.stderr = sys.stderr
+        self.event_dict = {}
+        self.con = get_redis_connection()
+
+    def _get_now(self):
+        '''获取时间'''
+        now = datetime.datetime.now()
+        now_str = now.strftime('%Y-%m-%d %H:%M:%S')
+        return now, now_str
+
+    def _clean_event(self):
+        '''清理过期事件,超过30秒既为过期事件'''
+        now = self._get_now()[0]
+        key_list = []
+        for key, value in self.event_dict.items():
+            diff = now - value[0]
+            if diff.seconds > 30:
+                key_list.append(key_list)
+
+        for key in key_list:
+            if key in self.event_dict:
+                self.event_dict.pop(key)
+
+    def send_redis(self, content):
+        '''推送消息redis'''
+        data = json.dumps(content)
+        self.con.rpush('yf_dingding', data)
+
+    def runforever(self, test=False):
+        while True:
+            headers, payload = childutils.listener.wait(
+                self.stdin, self.stdout)
+            pheaders, pdata = childutils.eventdata(payload + '\n')
+
+            app_name = pheaders['processname']
+            if app_name == 'crasheding':
+                childutils.listener.ok(self.stdout)
+                continue
+
+            eventname = headers['eventname']
+            if eventname != 'PROCESS_STATE_EXITED':
+                now_str = self._get_now()[1]
+                if eventname == 'PROCESS_STATE_RUNNING':
+                    if app_name in self.event_dict:
+                        msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 成功'.format(
+                            app_name, self.event_dict[app_name], now_str)
+                        self.event_dict.pop(app_name)
+                        self.send_redis({'content': msg})
+                elif eventname == 'PROCESS_STATE_FATAL':
+                    msg = '[{}] 程序于 {} 启动失败'.format(app_name, now_str)
+                    if app_name in self.event_dict:
+                        msg = '[{}] 程序于 {} 发生异常中止,并在 {} 自动重启 失败'.format(
+                            app_name, self.event_dict[app_name], now_str)
+                        self.event_dict.pop(app_name)
+                    self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
+
+                childutils.listener.ok(self.stdout)
+                continue
+
+            if int(pheaders['expected']):
+                childutils.listener.ok(self.stdout)
+                continue
+
+            self.stderr.write('unexpected exit, mailing\n')
+            self.stderr.flush()
+
+            self.event_dict[app_name] = self._get_now()[1]
+            msg = '[{}] 程序于 {} 发生异常中止'.format(
+                app_name, self.event_dict[app_name], self._get_now()[1])
+            self.send_redis({'content': msg, 'monitor_list': ['disk', 'memory']})
+
+            childutils.listener.ok(self.stdout)
+
+
+def main():
+    programs = []
+    prog = CrashExceptionWarning(programs, True)
+    prog.runforever()
+
+
+if __name__ == '__main__':
+    main()
+

+ 218 - 0
src/yf_supervisord.py

@@ -0,0 +1,218 @@
+# coding:utf-8
+
+import os
+import sys
+import fs
+import psutil
+import subprocess
+from utils.comm_tools import get_monitor_config, shell_cmd
+
+
+class SupervisorTools:
+    def __init__(self):
+        temp_dir = os.path.dirname(os.path.abspath(__file__))
+        utils_dir = os.path.join(temp_dir, 'utils')
+        self.exception_warning_file = os.path.join(utils_dir, 'exception_warning.py')
+        self.config_base = '/data/supervisor'
+        self.base_config_file = '/etc/supervisor/supervisord.conf'
+        self.python_bin = sys.executable
+        self.bin_dir = os.path.dirname(self.python_bin)
+        self.supervisord = os.path.join(self.bin_dir, 'supervisord')
+        self.supervisorctl = os.path.join(self.bin_dir, 'supervisorctl')
+        self.config_dir = os.path.join(self.config_base, 'config.d')
+        self.log_dir = os.path.join(self.config_base, 'log')
+
+        tmp_dir = os.path.dirname(self.base_config_file)
+        if not os.path.isdir(tmp_dir):
+            os.makedirs(tmp_dir)
+        if not os.path.isdir(self.config_dir):
+            os.makedirs(self.config_dir)
+        if not os.path.isdir(self.log_dir):
+            os.makedirs(self.log_dir)
+
+    def _get_supervisor_app(self):
+        '''获取supervisor所管理的程序'''
+        status, app_name_list = False, []
+        cmd = '{} status'.format(self.supervisorctl)
+        code, stdout, stderr = shell_cmd(cmd)
+
+        if code >= 0:
+            for line in stdout.splitlines():
+                tmp_line = line.split()
+                if len(tmp_line) > 1:
+                    app_name = tmp_line[0].strip()
+                    app_name_list.append(app_name)
+            status, msg = True, set(app_name_list)
+        else:
+            msg = '获取supervisor所管理的程序失败: {}'.format(stderr)
+        return status, msg
+
+    def _make_task_conf(self, app_name, dir_name, cmd):
+        '''生成任务配置文件'''
+        log_dir = os.path.join(self.log_dir, app_name)
+        if not os.path.isdir(log_dir):
+            os.makedirs(log_dir)
+        conf_path = os.path.join(self.config_dir, '{}.conf'.format(app_name))
+        log_file = os.path.join(log_dir, '{}.log'.format(app_name))
+
+        conf_str = '''
+#程序唯一名称
+[program:{app_name}]
+
+#程序路径                                       
+directory={dir_name}
+
+#运行程序的命令                                       
+command={cmd}
+
+#标准日志输出位置
+stdout_logfile={log_file}
+
+autostart=true                          #supervisord启动后,同时启动此程序                                  
+startsecs=5                             #启动5秒后没有异常退出,就表示进程正常启动了,默认为1秒
+autorestart=true                        #程序退出后自动重启
+startretries=3                          #启动失败自动重试次数,默认是3                   
+user=root                               #用哪个用户启动进程,默认是root                                      
+redirect_stderr=true                    #把stderr重定向到stdout标准输出,默认false            
+stdout_logfile_maxbytes=50MB            #stdout标准输出日志文件大小,日志文件备份数
+stdout_logfile_backups=10
+stopasgroup=true                        # 停止或终止进程时,同时结束进程包含的子进程 
+killasgroup=true
+
+'''.format(app_name=app_name, dir_name=dir_name, cmd=cmd, log_file=log_file)
+        with open(conf_path, 'w') as fw:
+            fw.write(conf_str)
+
+    def _make_supervisor_conf(self):
+        cmd = '{} -u {}'.format(self.python_bin, self.exception_warning_file)
+        conf = '''
+[unix_http_server]
+file=/data/supervisor/supervisor.sock   ; the path to the socket file
+;chmod=0700                 ; socket file mode (default 0700)
+;chown=nobody:nogroup       ; socket file uid:gid owner
+;username=user              ; default is no username (open server)
+;password=123               ; default is no password (open server)
+
+;[inet_http_server]         ; inet (TCP) server disabled by default
+;port=127.0.0.1:9001        ; ip_address:port specifier, *:port for all iface
+;username=user              ; default is no username (open server)
+;password=123               ; default is no password (open server)
+
+[supervisord]
+logfile=/data/supervisor/supervisord.log ; main log file; default $CWD/supervisord.log
+logfile_maxbytes=50MB        ; max main logfile bytes b4 rotation; default 50MB
+logfile_backups=10           ; # of main logfile backups; 0 means none, default 10
+loglevel=info                ; log level; default info; others: debug,warn,trace
+pidfile=/data/supervisor/supervisord.pid ; supervisord pidfile; default supervisord.pid
+nodaemon=false               ; start in foreground if true; default false
+silent=false                 ; no logs to stdout if true; default false
+minfds=1024                  ; min. avail startup file descriptors; default 1024
+minprocs=200                 ; min. avail process descriptors;default 200
+;umask=022                   ; process file creation umask; default 022
+user=root                    ; setuid to this UNIX account at startup; recommended if root
+;identifier=supervisor       ; supervisord identifier, default is 'supervisor'
+;directory=/tmp              ; default is not to cd during start
+;nocleanup=true              ; don't clean up tempfiles at start; default false
+;childlogdir=/tmp            ; 'AUTO' child log dir, default $TEMP
+;environment=KEY="value"     ; key value pairs to add to environment
+;strip_ansi=false            ; strip ansi escape codes in logs; def. false
+
+[rpcinterface:supervisor]
+supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
+
+[supervisorctl]
+serverurl=unix:///data/supervisor/supervisor.sock
+;username=chris              ; should be same as in [*_http_server] if set
+;password=123                ; should be same as in [*_http_server] if set
+;prompt=mysupervisor         ; cmd line prompt (default "supervisor")
+;history_file=~/.sc_history  ; use readline history if available
+
+[eventlistener:crash_warning]
+command={exception_warning_file}
+events=PROCESS_STATE
+redirect_stderr=false
+
+[include]
+files = /data/supervisor/config.d/*.conf
+
+'''.format(exception_warning_file=cmd)
+        with open(self.base_config_file, 'w') as fw:
+            fw.write(conf)
+
+    def _update_task(self):
+        '''启动添加的程序'''
+        status, msg = True, 'supervisor管理器启动程序成功'
+        cmd = '{} update all'.format(self.supervisorctl)
+        code, stdout, stderr = shell_cmd(cmd)
+        if code != 0:
+            status, msg = False, 'supervisor管理器启动程序失败:{}\n{}'.format(stdout, stderr)
+        return status, msg
+
+    def make_task(self):
+        '''创建supervisor任务并启动'''
+        status, msg = self._get_supervisor_app()
+        if not status:
+            return status, msg
+        app_name_set, app_name_list = msg, []
+        monitor_config = get_monitor_config()
+        with fs.open_fs('/') as root_fs:
+            for script_dir, item in monitor_config.items():
+                if not os.path.isdir(script_dir):
+                    continue
+                file_filter, file_exclude = item['filter'], item.get('exclude', [])
+                python_bin, env_name = item['executable'], item['name']
+                if not file_exclude:
+                    file_exclude = None
+                if not file_filter:
+                    file_filter = None
+                for path in root_fs.walk.files(script_dir, filter=file_filter, exclude=file_exclude, max_depth=1):
+                    dir_name, file_name = os.path.split(path)
+                    index = file_name.rfind('.')
+                    app_name = file_name[:index] if index != -1 else file_name
+                    app_name = "{}_{}".format(env_name, app_name)
+                    if app_name in app_name_set:
+                        continue
+                    cmd = '{} -u {}'.format(python_bin, path)
+                    self._make_task_conf(app_name=app_name, dir_name=dir_name, cmd=cmd)
+                    app_name_list.append(app_name)
+
+        status, msg = True, 'supervisor管理器启动程序成功'
+        if app_name_list:
+            status, msg = self._update_task()
+        return status, msg
+
+    def _check_supervisord(self):
+        '''	检测supervisor管理器进程是否运行'''
+        is_run = False
+        for proc in psutil.process_iter():
+            proc_name = proc.name().lower()
+            if proc_name == 'supervisord':
+                is_run = True
+                break
+        return is_run
+
+    def start_supervisord(self):
+        '''启动supervisor管理器'''
+        msg = ''
+        if not os.path.isfile(self.supervisord):
+            msg = 'supervisor未安装,请先安装supervisor'
+        else:
+            is_run = self._check_supervisord()
+            if not is_run:
+                self._make_supervisor_conf()
+                cmd = "{} -c {}".format(self.supervisord, self.base_config_file)
+                result = subprocess.run(cmd, shell=True)
+                if result.returncode == 0:
+                    is_run = True
+                else:
+                    msg = '启动supervisor管理器失败, 请检查'
+
+            if is_run:
+                status, msg = self.make_task()
+        return msg
+
+
+if __name__ == '__main__':
+    st = SupervisorTools()
+    msg = st.start_supervisord()
+    print(msg)