|
|
|
|
@ -6,7 +6,6 @@ import logging
@@ -6,7 +6,6 @@ import logging
|
|
|
|
|
import os |
|
|
|
|
if platform.system() == 'Windows': |
|
|
|
|
#线上正式运行 |
|
|
|
|
|
|
|
|
|
#本地测试 |
|
|
|
|
#sys.path.append('libs') |
|
|
|
|
# 判断是否存在libs目录 |
|
|
|
|
@ -14,14 +13,9 @@ if platform.system() == 'Windows':
@@ -14,14 +13,9 @@ if platform.system() == 'Windows':
|
|
|
|
|
sys.path.append('e:\\libs\\') |
|
|
|
|
else: |
|
|
|
|
sys.path.append('libs') |
|
|
|
|
|
|
|
|
|
else: |
|
|
|
|
sys.path.append('/data/deploy/make3d/make2/libs/') |
|
|
|
|
|
|
|
|
|
import config |
|
|
|
|
|
|
|
|
|
notifyUserIds = ["DongZhangXi"] |
|
|
|
|
|
|
|
|
|
#公共连接库 |
|
|
|
|
def pymysqlAlias(): |
|
|
|
|
return pymysql.connect( |
|
|
|
|
@ -31,13 +25,13 @@ def pymysqlAlias():
@@ -31,13 +25,13 @@ def pymysqlAlias():
|
|
|
|
|
password=config.mysql_local['password'], |
|
|
|
|
db=config.mysql_local['db'], |
|
|
|
|
charset=config.mysql_local['charset'],) |
|
|
|
|
|
|
|
|
|
#消息通知 |
|
|
|
|
def notify(content): |
|
|
|
|
|
|
|
|
|
if content == "": |
|
|
|
|
return "content 不能为空" |
|
|
|
|
|
|
|
|
|
for user_agent_id in notifyUserIds: |
|
|
|
|
for user_agent_id in config.notify_user_Ids: |
|
|
|
|
data = { |
|
|
|
|
'userId': user_agent_id, |
|
|
|
|
'message': content, |
|
|
|
|
@ -46,9 +40,6 @@ def notify(content):
@@ -46,9 +40,6 @@ def notify(content):
|
|
|
|
|
message_send_url = "https://mp.api.suwa3d.com/api/qyNotify/sendMessage?userId="+user_agent_id+"&message="+content |
|
|
|
|
response = requests.post(message_send_url, data=json.dumps(data), headers=headers) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#检测 task_distributed 有哪些任务是卡住时间很长没处理的 |
|
|
|
|
def check_task_distributed_detail(): |
|
|
|
|
try: |
|
|
|
|
@ -67,31 +58,30 @@ def check_task_distributed_detail():
@@ -67,31 +58,30 @@ def check_task_distributed_detail():
|
|
|
|
|
#判断是否有值 |
|
|
|
|
if len(result) == 0: |
|
|
|
|
return "no" |
|
|
|
|
|
|
|
|
|
#当前正在运行的任务数量 |
|
|
|
|
nowTaskNums = len(result) |
|
|
|
|
#遍历循环每个任务对应的步骤已经执行多长时间了 |
|
|
|
|
for row in result: |
|
|
|
|
|
|
|
|
|
taskData = get_task_distributed_by_id(row["task_distributed_id"]) |
|
|
|
|
if taskData == "error": |
|
|
|
|
notify(f'task_distributed_id{row["task_distributed_id"]}的数据异常') |
|
|
|
|
continue |
|
|
|
|
|
|
|
|
|
if row["step"] == "step1": |
|
|
|
|
#判断是否超过了30=分钟 |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > 180: |
|
|
|
|
#判断是否超过了3分钟 |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step1"): |
|
|
|
|
#发送消息通知 |
|
|
|
|
notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') |
|
|
|
|
|
|
|
|
|
if row["step"] == "step2": |
|
|
|
|
#判断是否超过了10分钟 |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > 60*10: |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step2"): |
|
|
|
|
#发送消息通知 |
|
|
|
|
notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') |
|
|
|
|
|
|
|
|
|
if row["step"] == "step3": |
|
|
|
|
#判断是否超过了10分钟 |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > 60*8: |
|
|
|
|
if (time.time() - time.mktime(row["started_at"].timetuple())) > get_time_out(row["hostname"],"step3"): |
|
|
|
|
#发送消息通知 |
|
|
|
|
notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{taskData["task_key"]}的{row["step"]}步骤已经超运行太久了,当前还有{waitNums}个任务未完成') |
|
|
|
|
return "no" |
|
|
|
|
@ -118,12 +108,11 @@ def check_task():
@@ -118,12 +108,11 @@ def check_task():
|
|
|
|
|
#判断是否有值 |
|
|
|
|
if len(result) == 0: |
|
|
|
|
return "no" |
|
|
|
|
|
|
|
|
|
nowTaskNums = len(result) |
|
|
|
|
#遍历循环每个任务对应的步骤已经执行多长时间了 |
|
|
|
|
for row in result: |
|
|
|
|
#判断是否超过了30=分钟 |
|
|
|
|
if (time.time() - time.mktime(row["created_at"].timetuple())) > 60*25: |
|
|
|
|
#判断是否超过了40=分钟 |
|
|
|
|
if (time.time() - time.mktime(row["created_at"].timetuple())) > get_time_out(row["hostname"],"all"): |
|
|
|
|
#发送消息通知 |
|
|
|
|
notify(f'{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}-任务{row["task_key"]}已经超运行超过25分钟了,当前还有{waitNums}个任务未完成') |
|
|
|
|
return "no" |
|
|
|
|
@ -152,6 +141,19 @@ def get_task_distributed_by_id(id):
@@ -152,6 +141,19 @@ def get_task_distributed_by_id(id):
|
|
|
|
|
logging.error(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())} 执行get_task_distributed_by_id异常: {str(e)}") |
|
|
|
|
return 'error' |
|
|
|
|
|
|
|
|
|
#根据主机获取任务运行超时时间 |
|
|
|
|
def get_time_out(hostname,step): |
|
|
|
|
arrTimeOut = config.task_run_timeout |
|
|
|
|
if hostname not in arrTimeOut: |
|
|
|
|
return "" |
|
|
|
|
|
|
|
|
|
if step not in arrTimeOut[hostname]: |
|
|
|
|
return "" |
|
|
|
|
|
|
|
|
|
return arrTimeOut[hostname][step] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#程序主入口 |
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
#开启死循环 |
|
|
|
|
@ -160,4 +162,5 @@ if __name__ == '__main__':
@@ -160,4 +162,5 @@ if __name__ == '__main__':
|
|
|
|
|
check_task() |
|
|
|
|
#两分钟检测一次 |
|
|
|
|
time.sleep(120) |
|
|
|
|
print("检测是否存在运行时间超长的任务,进行消息通知") |
|
|
|
|
|