AIO/codes/analysis/wavelogger.py
2025-03-27 19:05:02 +08:00

162 lines
7.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas
import csv
import openpyxl
import chardet
import time
from PySide6.QtCore import Signal, QThread
from codes.common import clibs
class WaveloggerDataProcess(QThread):
output = Signal(str, str)
def __init__(self, dir_path, /):
super().__init__()
self.dir_path = dir_path
self.idx = 3
def logger(self, level, module, content, color="black", error="", flag="both"):
flag = "cursor" if level.upper() == "DEBUG" else "both"
clibs.logger(level, module, content, color, flag, signal=self.output)
if level.upper() == "ERROR":
raise Exception(f"{error} | {content}")
def find_point(self, bof, step, margin, threshold, pos, data_file, flag, df, row):
# bof: backward or forward
# pos: used for debug
# flag: greater than or lower than
row_target = None
row_origin = len(df) - margin + 1
if flag == "gt":
while 0 < row < row_origin:
value = float(df.iloc[row, 2])
if value > threshold:
row = row - step if bof == "backward" else row + step
continue
else:
row_target = row - step if bof == "backward" else row + step
break
else:
if bof == "backward":
self.logger("ERROR", "wavelogger", f"find_point-gt: [{pos}] 在 {data_file} 中,无法正确识别数据,需要确认...", "red", "DataError")
elif bof == "forward":
row_target = row + margin # to end while loop in function `single_file_proc`
elif flag == "lt":
while 0 < row < row_origin:
value = float(df.iloc[row, 2])
if value < threshold:
row = row - step if bof == "backward" else row + step
continue
else:
row_target = row - step if bof == "backward" else row + step
break
else:
if bof == "backward":
self.logger("ERROR", "wavelogger", f"find_point-lt: [{pos}] 在 {data_file} 中,无法正确识别数据,需要确认...", "red", "DataError")
elif bof == "forward":
row_target = row + margin # to end while loop in function `single_file_proc`
return row_target
def get_cycle_info(self, data_file, step, margin, threshold):
# end -> middle: low
# middle -> start: high
# 1. 从最后读取数据无论是大于1还是小于1都舍弃找到相反的值的起始点
# 2. 从起始点,继续往前寻找,找到与之数值相反的中间点
# 3. 从中间点,继续往前寻找,找到与之数值相反的结束点,至此,得到了高低数值的时间区间以及一轮的周期时间
with open(data_file, "rb") as f:
raw_data = f.read(1000)
result = chardet.detect(raw_data)
encoding = result['encoding']
csv_reader = csv.reader(open(data_file, encoding=encoding))
begin = int(next(csv_reader)[1])
df = pandas.read_csv(data_file, sep=",", encoding=encoding, skip_blank_lines=False, header=begin - 1, on_bad_lines="skip")
row = len(df) - margin
if float(df.iloc[row, 2]) < threshold:
row = self.find_point("backward", step, margin, threshold, "a1", data_file, "lt", df, row)
_row = self.find_point("backward", step, margin, threshold, "a2", data_file, "gt", df, row)
_row = self.find_point("backward", step, margin, threshold, "a3", data_file, "lt", df, _row)
row_end = self.find_point("backward", step, margin, threshold, "a4", data_file, "gt", df, _row)
row_middle = self.find_point("backward", step, margin, threshold, "a5", data_file, "lt", df, row_end)
row_start = self.find_point("backward", step, margin, threshold, "a6", data_file, "gt", df, row_middle)
# print(f"row_end = {row_end}")
# print(f"row_middle = {row_middle}")
# print(f"row_start = {row_start}")
return row_end-row_middle, row_middle-row_start, row_end-row_start, df
def initialization(self):
_, data_files = clibs.traversal_files(self.dir_path, self.output)
for data_file in data_files:
if not data_file.lower().endswith(".csv"):
self.logger("ERROR", "wavelogger", f"init: {data_file} 文件后缀错误,只允许 .csv 文件,需要确认!", "red", "FileTypeError")
return data_files
def preparation(self, data_file, step, margin, threshold, wb):
shtname = data_file.split("/")[-1].split(".")[0]
ws = wb.create_sheet(shtname)
low, high, cycle, df = self.get_cycle_info(data_file, step, margin, threshold)
return ws, df, low, high, cycle
def single_file_proc(self, ws, data_file, step, threshold, margin, data_length, df, cycle):
row, row_lt, row_gt, count, count_i, data = 1, 1, 1, 1, 1, {}
row_max = len(df) - margin
while row < row_max:
if count not in data.keys():
data[count] = []
value = float(df.iloc[row, 2])
if value < threshold:
row_lt = self.find_point("forward", step, margin, threshold, "c"+str(row), data_file, "lt", df, row)
start = int(row_gt + (row_lt - row_gt - data_length) / 2)
end = start + data_length
value = df.iloc[start:end, 2].astype(float).mean() + 3 * df.iloc[start:end, 2].astype(float).std()
if value > 1:
msg = f"\n"
self.logger("WARNING", "wavelogger", f"{data_file} 文件第 {count} 轮 第 {count_i} 个数据可能有问题,需人工手动确认,确认有问题可删除,无问题则保留", "purple")
data[count].append(value)
count_i += 1
else:
row_gt = self.find_point("forward", step, margin, threshold, "c"+str(row), data_file, "gt", df, row)
if row_gt - row_lt > cycle * 2:
count += 1
count_i = 1
row = max(row_gt, row_lt)
for i in range(2, 10):
ws.cell(row=1, column=i).value = f"{i-1}次测试"
ws.cell(row=i, column=1).value = f"{i-1}次精度变化"
for i in sorted(data.keys()):
row, column = 2, i + 1
for value in data[i]:
ws.cell(row=row, column=column).value = float(value)
row += 1
def execution(self, data_files):
self.logger("INFO", "wavelogger", "正在处理中......", "blue")
wb = openpyxl.Workbook()
step, margin, data_length, threshold = 5, 50, 50, 5
for data_file in data_files:
ws, df, low, high, cycle = self.preparation(data_file, step, margin, threshold, wb)
self.single_file_proc(ws, data_file, step, threshold, margin, data_length, df, cycle)
wd = "/".join(data_files[0].split("/")[:-1])
filename = wd + "/result.xlsx"
wb.save(filename)
wb.close()
def processing(self):
time_start = time.time()
clibs.running[self.idx] = 1
data_files = self.initialization()
self.execution(data_files)
self.logger("INFO", "wavelogger", "-" * 60 + "<br>全部处理完毕<br>", "purple")
time_total = time.time() - time_start
msg = f"数据处理时间:{time_total // 3600:02.0f} h {time_total % 3600 // 60:02.0f} m {time_total % 60:02.0f} s\n"
self.logger("INFO", "wavelogger", msg)