From 16e0afc0fe52a79b730513f8e15454c4c8935cf2 Mon Sep 17 00:00:00 2001 From: dongchangxi <458593490@qq.com> Date: Wed, 11 Oct 2023 09:59:19 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9A=E6=97=B6=E6=9F=A5=E8=AF=A2=E8=B6=85?= =?UTF-8?q?=E6=97=B6=E4=BB=BB=E5=8A=A1=E7=9A=84=E4=BF=AE=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- timer/check_task.py | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/timer/check_task.py b/timer/check_task.py index cc0dea0..31eca16 100644 --- a/timer/check_task.py +++ b/timer/check_task.py @@ -6,7 +6,6 @@ import logging import os if platform.system() == 'Windows': #线上正式运行 - #本地测试 #sys.path.append('libs') # 判断是否存在libs目录 @@ -14,14 +13,9 @@ if platform.system() == 'Windows': sys.path.append('e:\\libs\\') else: sys.path.append('libs') - else: sys.path.append('/data/deploy/make3d/make2/libs/') - import config - -notifyUserIds = ["DongZhangXi"] - #公共连接库 def pymysqlAlias(): return pymysql.connect( @@ -31,13 +25,13 @@ def pymysqlAlias(): password=config.mysql_local['password'], db=config.mysql_local['db'], charset=config.mysql_local['charset'],) - +#消息通知 def notify(content): if content == "": return "content 不能为空" - for user_agent_id in notifyUserIds: + for user_agent_id in config.notify_user_Ids: data = { 'userId': user_agent_id, 'message': content, @@ -45,9 +39,6 @@ def notify(content): headers = {'Content-Type': 'application/json'} message_send_url = "https://mp.api.suwa3d.com/api/qyNotify/sendMessage?userId="+user_agent_id+"&message="+content response = requests.post(message_send_url, data=json.dumps(data), headers=headers) - - - #检测 task_distributed 有哪些任务是卡住时间很长没处理的 def check_task_distributed_detail(): @@ -67,31 +58,30 @@ def check_task_distributed_detail(): #判断是否有值 if len(result) == 0: return "no" - + #当前正在运行的任务数量 nowTaskNums = len(result) #遍历循环每个任务对应的步骤已经执行多长时间了 for row in result: - taskData = get_task_distributed_by_id(row["task_distributed_id"]) if taskData == "error": notify(f'task_distributed_id{row["task_distributed_id"]}的数据异常') continue if row["step"] == "step1": - #判断是否超过了30=分钟 - if (time.time() - time.mktime(row["started_at"].timetuple())) > 180: + #判断是否超过了3分钟 + if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step1"): #发送消息通知 notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') if row["step"] == "step2": #判断是否超过了10分钟 - if (time.time() - time.mktime(row["started_at"].timetuple())) > 60*10: + if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step2"): #发送消息通知 notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') if row["step"] == "step3": #判断是否超过了10分钟 - if (time.time() - time.mktime(row["started_at"].timetuple())) > 60*8: + if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step3"): #发送消息通知 notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') return "no" @@ -118,12 +108,11 @@ def check_task(): #判断是否有值 if len(result) == 0: return "no" - nowTaskNums = len(result) #遍历循环每个任务对应的步骤已经执行多长时间了 for row in result: - #判断是否超过了30=分钟 - if (time.time() - time.mktime(row["created_at"].timetuple())) > 60*25: + #判断是否超过了40=分钟 + if (time.time() - time.mktime(row["created_at"].timetuple())) > get_time_out(row["hostname"],"all"): #发送消息通知 notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{row["task_key"]}已经超运行超过25分钟了,当前还有{waitNums}个任务未完成') return "no" @@ -152,6 +141,19 @@ def get_task_distributed_by_id(id): logging.error(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())} 执行get_task_distributed_by_id异常: {str(e)}") return 'error' +#根据主机获取任务运行超时时间 +def get_time_out(hostname,step): + arrTimeOut = config.task_run_timeout + if hostname not in arrTimeOut: + return "" + + if step not in arrTimeOut[hostname]: + return "" + + return arrTimeOut[hostname][step] + + + #程序主入口 if __name__ == '__main__': #开启死循环 @@ -160,4 +162,5 @@ if __name__ == '__main__': check_task() #两分钟检测一次 time.sleep(120) + print("检测是否存在运行时间超长的任务,进行消息通知") \ No newline at end of file