AIO/codes/analysis/wavelogger.py

import pandas
import csv
import openpyxl
import chardet
import time
from PySide6.QtCore import Signal, QThread
from codes.common import clibs


class WaveloggerDataProcess(QThread):
    output = Signal(str, str)

    def __init__(self, dir_path, /):
        super().__init__()
        self.dir_path = dir_path
        self.idx = 3

    def logger(self, level, module, content, color="black", error="", flag="both"):
        flag = "cursor" if level.upper() == "DEBUG" else "both"
        clibs.logger(level, module, content, color, flag, signal=self.output)
        if level.upper() == "ERROR":
            raise Exception(f"{error} | {content}")

    def find_point(self, bof, step, margin, threshold, pos, data_file, flag, df, row):
        # bof: backward or forward
        # pos: used for debug
        # flag: greater than or lower than
        row_target = None
        row_origin = len(df) - margin + 1
        if flag == "gt":
            while 0 < row < row_origin:
                value = float(df.iloc[row, 2])
                if value > threshold:
                    row = row - step if bof == "backward" else row + step
                    continue
                else:
                    row_target = row - step if bof == "backward" else row + step
                    break
            else:
                if bof == "backward":
                    self.logger("ERROR", "wavelogger", f"find_point-gt: [{pos}] 在 {data_file} 中，无法正确识别数据，需要确认...", "red", "DataError")
                elif bof == "forward":
                    row_target = row + margin  # to end while loop in function `single_file_proc`
        elif flag == "lt":
            while 0 < row < row_origin:
                value = float(df.iloc[row, 2])
                if value < threshold:
                    row = row - step if bof == "backward" else row + step
                    continue
                else:
                    row_target = row - step if bof == "backward" else row + step
                    break
            else:
                if bof == "backward":
                    self.logger("ERROR", "wavelogger", f"find_point-lt: [{pos}] 在 {data_file} 中，无法正确识别数据，需要确认...", "red", "DataError")
                elif bof == "forward":
                    row_target = row + margin  # to end while loop in function `single_file_proc`
        return row_target

    def get_cycle_info(self, data_file, step, margin, threshold):
        # end -> middle: low
        # middle -> start: high
        # 1. 从最后读取数据，无论是大于1还是小于1，都舍弃，找到相反的值的起始点
        # 2. 从起始点，继续往前寻找，找到与之数值相反的中间点
        # 3. 从中间点，继续往前寻找，找到与之数值相反的结束点，至此，得到了高低数值的时间区间以及一轮的周期时间
        with open(data_file, "rb") as f:
            raw_data = f.read(1000)
            result = chardet.detect(raw_data)
            encoding = result['encoding']
        csv_reader = csv.reader(open(data_file, encoding=encoding))
        begin = int(next(csv_reader)[1])
        df = pandas.read_csv(data_file, sep=",", encoding=encoding, skip_blank_lines=False, header=begin - 1, on_bad_lines="skip")
        row = len(df) - margin
        if float(df.iloc[row, 2]) < threshold:
            row = self.find_point("backward", step, margin, threshold, "a1", data_file, "lt", df, row)

        _row = self.find_point("backward", step, margin, threshold, "a2", data_file, "gt", df, row)
        _row = self.find_point("backward", step, margin, threshold, "a3", data_file, "lt", df, _row)
        row_end = self.find_point("backward", step, margin, threshold, "a4", data_file, "gt", df, _row)
        row_middle = self.find_point("backward", step, margin, threshold, "a5", data_file, "lt", df, row_end)
        row_start = self.find_point("backward", step, margin, threshold, "a6", data_file, "gt", df, row_middle)
        # print(f"row_end = {row_end}")
        # print(f"row_middle = {row_middle}")
        # print(f"row_start = {row_start}")
        return row_end-row_middle, row_middle-row_start, row_end-row_start, df

    def initialization(self):
        _, data_files = clibs.traversal_files(self.dir_path, self.output)

        for data_file in data_files:
            if not data_file.lower().endswith(".csv"):
                self.logger("ERROR", "wavelogger", f"init: {data_file} 文件后缀错误，只允许 .csv 文件，需要确认！", "red", "FileTypeError")

        return data_files

    def preparation(self, data_file, step, margin, threshold, wb):
        shtname = data_file.split("/")[-1].split(".")[0]
        ws = wb.create_sheet(shtname)
        low, high, cycle, df = self.get_cycle_info(data_file, step, margin, threshold)

        return ws, df, low, high, cycle

    def single_file_proc(self, ws, data_file, step, threshold, margin, data_length, df, cycle):
        row, row_lt, row_gt, count, count_i, data = 1, 1, 1, 1, 1, {}
        row_max = len(df) - margin
        while row < row_max:
            if count not in data.keys():
                data[count] = []

            value = float(df.iloc[row, 2])
            if value < threshold:
                row_lt = self.find_point("forward", step, margin, threshold, "c"+str(row), data_file, "lt", df, row)
                start = int(row_gt + (row_lt - row_gt - data_length) / 2)
                end = start + data_length
                value = df.iloc[start:end, 2].astype(float).mean() + 3 * df.iloc[start:end, 2].astype(float).std()
                if value > 1:
                    msg = f"\n"
                    self.logger("WARNING", "wavelogger", f"{data_file} 文件第 {count} 轮 第 {count_i} 个数据可能有问题，需人工手动确认，确认有问题可删除，无问题则保留", "purple")

                data[count].append(value)
                count_i += 1
            else:
                row_gt = self.find_point("forward", step, margin, threshold, "c"+str(row), data_file, "gt", df, row)
                if row_gt - row_lt > cycle * 2:
                    count += 1
                    count_i = 1
            row = max(row_gt, row_lt)
        for i in range(2, 10):
            ws.cell(row=1, column=i).value = f"第{i-1}次测试"
            ws.cell(row=i, column=1).value = f"第{i-1}次精度变化"

        for i in sorted(data.keys()):
            row, column = 2, i + 1
            for value in data[i]:
                ws.cell(row=row, column=column).value = float(value)
                row += 1

    def execution(self, data_files):
        self.logger("INFO", "wavelogger", "正在处理中......", "blue")
        wb = openpyxl.Workbook()
        step, margin, data_length, threshold = 5, 50, 50, 5
        for data_file in data_files:
            ws, df, low, high, cycle = self.preparation(data_file, step, margin, threshold, wb)
            self.single_file_proc(ws, data_file, step, threshold, margin, data_length, df, cycle)

        wd = "/".join(data_files[0].split("/")[:-1])
        filename = wd + "/result.xlsx"
        wb.save(filename)
        wb.close()

    def processing(self):
        time_start = time.time()
        clibs.running[self.idx] = 1

        data_files = self.initialization()
        self.execution(data_files)

        self.logger("INFO", "wavelogger", "-" * 60 + "<br>全部处理完毕<br>", "purple")
        time_total = time.time() - time_start
        msg = f"数据处理时间：{time_total // 3600:02.0f} h {time_total % 3600 // 60:02.0f} m {time_total % 60:02.0f} s\n"
        self.logger("INFO", "wavelogger", msg)