# coding: utf-8 from os import scandir from os.path import isfile, exists from sys import argv from openpyxl import load_workbook from time import time, sleep, strftime, localtime from threading import Thread from pandas import read_csv class GetThreadResult(Thread): def __init__(self, func, args=()): super(GetThreadResult, self).__init__() self.func = func self.args = args self.result = 0 def run(self): sleep(1) self.result = self.func(*self.args) def get_result(self): Thread.join(self) # 等待线程执行完毕 try: return self.result except Exception as Err: return None def data_process(result_file, raw_data_dirs, av, rr, axis, vel, trq, w2t, rpm): # 功能:完成一个结果文件的数据处理 # 参数:结果文件,数据目录,以及预读取的参数 # 返回值:- file_name = result_file.split('\\')[-1] w2t(f"打开文件 {file_name} 需要 1min 左右", 1) global stop stop = 0 t_excel = GetThreadResult(load_workbook, args=(result_file, )) t_wait = Thread(target=w2t_local, args=('.', 1, w2t)) t_excel.start() t_wait.start() t_excel.join() wb_result = t_excel.get_result() stop = 1 sleep(1.1) w2t('') prefix = result_file.split('\\')[-1].split('_')[0] for raw_data_dir in raw_data_dirs: if raw_data_dir.split('\\')[-1].split('_')[0] == prefix: now_doing_msg(raw_data_dir, 'start', w2t) _, data_files = traversal_files(raw_data_dir, w2t) # 数据文件串行处理模式--------------------------------- # count = 1 # for data_file in data_files: # now_doing_msg(data_file, 'start', w2t) # single_file_process(data_file, wb_result, count, av, rr, axis, vel, trq, w2t, rpm) # count += 1 # now_doing_msg(data_file, 'done', w2t) # --------------------------------------------------- # 数据文件并行处理模式--------------------------------- threads = [Thread(target=single_file_process, args=(data_files[0], wb_result, 1, av, rr, axis, vel, trq, w2t, rpm)), Thread(target=single_file_process, args=(data_files[1], wb_result, 2, av, rr, axis, vel, trq, w2t, rpm)), Thread(target=single_file_process, args=(data_files[2], wb_result, 3, av, rr, axis, vel, trq, w2t, rpm))] [t.start() for t in threads] [t.join() for t in threads] # --------------------------------------------------- now_doing_msg(raw_data_dir, 'done', w2t) now_doing_msg(result_file, 'done', w2t) w2t(f"保存文件 {file_name} 需要 1min 左右", 1) stop = 0 t_excel = Thread(target=wb_result.save, args=(result_file, )) t_wait = Thread(target=w2t_local, args=('.', 1, w2t)) t_excel.start() t_wait.start() t_excel.join() stop = 1 sleep(1.1) w2t('\n') def check_files(raw_data_dirs, result_files, w2t): # 功能:检查数据文件以及结果文件的合规性 # 参数:数据文件夹,结果文件 # 返回值:- if len(result_files) != 3: msg = "结果文件数目错误,结果文件有且只有三个,请确认!" for result_file in result_files: w2t(result_file) w2t(msg) prefix = [] for result_file in result_files: prefix.append(result_file.split('\\')[-1].split('_')[0]) if not sorted(prefix) == sorted(['load33', 'load66', 'load100']): wd = result_files[0].split('\\') del wd[-1] wd = '\\'.join(wd) msg = f"""请关闭所有相关数据文件,并检查工作目录 {wd} 下,有且只允许有类似如下三个文件: 1. load33_自研_制动性能测试.xlsx 2. load66_自研_制动性能测试.xlsx 3. load100_自研_制动性能测试.xlsx""" w2t(msg) for raw_data_dir in raw_data_dirs: components = raw_data_dir.split('\\')[-1].split('_') sorted(components) if components[0] not in ['load33', 'load66', 'load100'] or \ components[1] not in ['speed33', 'speed66', 'speed100'] or \ components[2] not in ['reach33', 'reach66', 'reach100']: msg = f"报错信息:数据目录 {raw_data_dir} 命名不合规,请参考如下形式\n" \ f"命名规则:\n 1. loadAA_speedBB_reachCC\n 2. loadAA_reachBB_speedCC\n" \ f"规则解释:AA/BB/CC 指的是负载/速度/臂展的比例\n" \ f"load66_speed100_reach33:66% 负载,100% 速度以及 33% 臂展情况下的测试结果文件夹" w2t(msg) _, raw_data_files = traversal_files(raw_data_dir, w2t) if len(raw_data_files) != 3: msg = f"数据目录 {raw_data_dir} 下数据文件个数错误,每个数据目录下有且只能有三个以 .data 为后缀的数据文件" w2t(msg) for raw_data_file in raw_data_files: if not (raw_data_file.split('\\')[-1].endswith('.data') or raw_data_file.split('\\')[-1].endswith('.csv')): msg = f"数据文件 {raw_data_file} 后缀错误,每个数据目录下有且只能有三个以 .data/csv 为后缀的数据文件" w2t(msg) w2t("数据目录合规性检查结束,未发现问题......") def now_doing_msg(docs, flag, w2t): # 功能:输出正在处理的文件或目录 # 参数:文件或目录,start 或 done 标识 # 返回值:- now = strftime('%Y-%m-%d %H:%M:%S', localtime(time())) file_type = 'file' if isfile(docs) else 'dir' if flag == 'start' and file_type == 'dir': w2t(f"[{now}] 正在处理目录 {docs} 中的数据......") elif flag == 'start' and file_type == 'file': w2t(f"[{now}] 正在处理文件 {docs} 中的数据......") elif flag == 'done' and file_type == 'dir': w2t(f"[{now}] 目录 {docs} 数据文件已处理完毕......") elif flag == 'done' and file_type == 'file': w2t(f"[{now}] 文件 {docs} 数据已处理完毕......") def w2t_local(msg, wait, w2t): while True: global stop if stop == 0 and wait != 0: sleep(1) w2t(msg, wait) else: break def single_file_process(data_file, wb_result, count, av, rr, axis, vel, trq, w2t, rpm): # 功能:完成单个数据文件的处理 # 参数:如上 # 返回值:- if data_file.endswith('.data'): sep = '\t' df = read_csv(data_file, sep=sep) elif data_file.endswith('.csv'): sep = ',' df = read_csv(data_file, sep=sep, encoding='gbk', header=8) conditions = sorted(data_file.split('\\')[-2].split('_')[1:]) result_sheet_name = find_result_sheet_name(conditions, count) ws_result = wb_result[result_sheet_name] row_max, row_start = find_row_start(data_file, df, conditions, av, rr, axis, vel, w2t, rpm) copy_data_to_result(df, ws_result, row_max, row_start, vel, trq, rpm) ws_result["C2"] = int(2) ws_result["G2"] = int(10+4) def copy_data_to_result(df, ws_result, row_max, row_start, vel, trq, rpm): # 功能:将数据文件中有效数据拷贝至结果文件对应的 sheet # 参数:如上 # 返回值:- # 结果文件数据清零 for row in ws_result.iter_rows(min_row=2, min_col=1, max_row=2000, max_col=2): for cell in row: cell.value = None # 将合适的数据复制到结果文件 row_max = row_start + 399 if row_max-row_start > 400 else row_max rc = 1 if rpm == 1 else 1000 data = [] for i in range(row_start, row_max+1): data.append(df.iloc[i, vel-1] * rpm) data.append(df.iloc[i, trq-1] * rc) i = 0 for row in ws_result.iter_rows(min_row=2, min_col=1, max_row=row_max - row_start + 2, max_col=2): for cell in row: cell.value = data[i] i = i + 1 def find_result_sheet_name(conditions, count): # 功能:获取结果文件准确的sheet页名称 # 参数:臂展和速度的列表 # 返回值:结果文件对应的sheet name # 33%臂展_33%速度_正1 reach = conditions[0].removeprefix('reach') speed = conditions[1].removeprefix('speed') result_sheet_name = f"{reach}%臂展_{speed}%速度_正{count}" return result_sheet_name def find_row_start(data_file, df, conditions, av, rr, axis, vel, w2t, rpm): # 功能:查找数据文件中有效数据的行号,也即最后一个速度下降的点位 # 参数:如上 # 返回值:速度下降点位,最后的数据点位 ratio = float(conditions[1].removeprefix('speed'))/100 speed_max = av * ratio * rr / 6 row_max = row_start = df.index[-1] threshold, step = get_threshold_step(data_file, axis) while row_start > step+1: speed = df.iloc[row_start, vel-1] * rpm if int(speed) < 1: row_start -= 50 continue _ = [] for i in range(row_start, row_start-step+1, -1): _.append(df.iloc[i, vel-1] * rpm) speed_avg = abs(sum(_))/len(_) if abs(speed_avg-speed_max) < threshold: row_start = row_start - 10 break else: row_start -= step else: msg = f"可能是{data_file}这个文件数据采集有问题,比如未采集理论速度值,也有可能是程序步长设定问题,请检查......" w2t(msg) return row_max, row_start def get_threshold_step(excel_file, axis): # 功能:负载和速度100%,且是j2的时候,做特殊处理 # 参数:新生成的excel,轴号 # 返回值:速度差阈值,处理步长 conditions = sorted(excel_file.split('\\')[-2].split('_')) # 只有负载和速度是100%时,才会启用更敏感的step flg = 1 if conditions[0][-3:] == '100' and conditions[2][-3:] == '100' else 0 if flg == 1 and axis == 2: threshold = 30 step = 5 else: threshold = 10 step = 5 return threshold, step def traversal_files(path, w2t): # 功能:以列表的形式分别返回指定路径下的文件和文件夹,不包含子目录 # 参数:路径 # 返回值:路径下的文件夹列表 路径下的文件列表 if not exists(path): msg = f'数据文件夹{path}不存在,请确认后重试......' w2t(msg) else: dirs = [] files = [] for item in scandir(path): if item.is_dir(): dirs.append(item.path) elif item.is_file(): files.append(item.path) return dirs, files def main(path, av, rr, rpm, axis, vel, trq, w2t): # 功能:执行处理所有数据文件 # 参数:initialization函数的返回值 # 返回值:- time_start = time() raw_data_dirs, result_files = traversal_files(path, w2t) check_files(raw_data_dirs, result_files, w2t) prefix = [] for raw_data_dir in raw_data_dirs: prefix.append(raw_data_dir.split('\\')[-1].split("_")[0]) try: # threads = [] for result_file in result_files: if result_file.split('\\')[-1].split('_')[0] not in set(prefix): continue else: now_doing_msg(result_file, 'start', w2t) data_process(result_file, raw_data_dirs, av, rr, axis, vel, trq, w2t, rpm) # threads.append(Thread(target=data_process, args=(result_file, raw_data_dirs, AV, RR, RC, AXIS))) # [t.start() for t in threads] # [t.join() for t in threads] except Exception as Err: msg = f"出现错误:{Err}\n程序运行错误,请检查配置文件是否准确设定,以及数据文件组织是否正确,也有可能是结果文件损坏,尝试重新复制一份,再运行!" w2t(msg) w2t("----------------------------------------------------------") w2t("全部处理完毕") time_end = time() time_total = time_end - time_start msg = f"数据处理时间:{time_total // 3600:02.0f} h {time_total % 3600 // 60:02.0f} min {time_total % 60:02.0f} s" w2t(msg) if __name__ == "__main__": stop = 0 main(path=argv[1], av=argv[2], rr=argv[3], rpm=argv[4], axis=argv[5], vel=argv[6], trq=argv[7], w2t=argv[8])