6 месяцев назад · 5540ec0ebe
--- a/train_corrector.py
+++ b/train_corrector.py
@@ -1,6 +1,8 @@
 
				 import pandas as pd
			
 
				 import joblib
			
 
				+from apscheduler.schedulers.blocking import BlockingScheduler
			
 
				 from sklearn.ensemble import RandomForestRegressor
			
 
				+from sklearn.model_selection import train_test_split
			
 
				 
			
 
				 # === 配置路径 ===
			
 
				 csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'  # 你的csv
			
@@ -12,66 +14,69 @@ feature_columns = [
 
				     "T_air", "T_initial", "T_m", "a", "b", "c"
			
 
				 ]
			
 
				 
			
 
				-# === 1. 读取CSV并预处理 ===
			
 
				-try:
			
 
				-    df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
			
 
				-    print(f"✅ 成功读取CSV文件，共{len(df)}条数据")
			
 
				-except Exception as e:
			
 
				-    print(f"❌ 读取CSV失败: {e}")
			
 
				-    exit(1)
			
 
				-
			
 
				-# 确保字段类型正确（如果这两列存在）
			
 
				-for col in ["material_name", "manufactured_goods"]:
			
 
				-    if col in df.columns:
			
 
				-        df[col] = df[col].astype(str)
			
 
				-
			
 
				-# 计算真实解冻时长（小时）
			
 
				-df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
			
 
				-
			
 
				-# 检查有没有缺失特征
			
 
				-missing_features = [col for col in feature_columns if col not in df.columns]
			
 
				-if missing_features:
			
 
				-    print(f"❌ 缺少必要特征列: {missing_features}")
			
 
				-    exit(1)
			
 
				-
			
 
				-# === 2. 智能训练模型 ===
			
 
				-X = df[feature_columns]
			
 
				-y = df["t_real_hours"]
			
 
				-
			
 
				-if len(X) >= 10:
			
 
				-    # 数据够多，做train_test_split
			
 
				-    from sklearn.model_selection import train_test_split
			
 
				-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
			
 
				-    print(f"📚 数据量 {len(X)}，已划分训练集和测试集")
			
 
				-else:
			
 
				-    # 数据少，直接全量训练
			
 
				-    X_train, y_train = X, y
			
 
				-    X_test, y_test = None, None
			
 
				-    print(f"⚠️ 数据量太少（{len(X)}条），直接全量训练")
			
 
				-
			
 
				-# 建立随机森林回归模型
			
 
				-model = RandomForestRegressor(n_estimators=100, random_state=42)
			
 
				-model.fit(X_train, y_train)
			
 
				-
			
 
				-# 保存模型
			
 
				-joblib.dump(model, model_save_path)
			
 
				-print(f"✅ 模型训练完成，已保存为 {model_save_path}")
			
 
				-
			
 
				-# === 3. 预测最新一条数据 ===
			
 
				-new_sample = df.tail(1)  # 取最后一行
			
 
				-
			
 
				-X_new = new_sample[feature_columns]
			
 
				-predicted_time = model.predict(X_new)[0]
			
 
				-
			
 
				-# 把预测值写回DataFrame
			
 
				-df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
			
 
				-
			
 
				-# === 4. 保存带预测值的CSV ===
			
 
				-try:
			
 
				-    df.to_csv(csv_path, encoding='utf-8', index=False)
			
 
				-    print(f"✅ 最新数据预测完成，已更新到 {csv_path}")
			
 
				-except Exception as e:
			
 
				-    print(f"❌ 保存CSV失败: {e}")
			
 
				-
			
 
				-# === 5. 打印最终预测结果 ===
			
 
				-print(f"\n📊 预测最后一条数据真实解冻时间为：{predicted_time:.2f} 小时")
			
 
				+# 定义定时任务的训练函数
			
 
				+def train_and_save_model():
			
 
				+    print("🔄 定时任务开始：重新训练模型...")
			
 
				+
			
 
				+    # === 1. 读取CSV并预处理 ===
			
 
				+    try:
			
 
				+        df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
			
 
				+        print(f"✅ 成功读取CSV文件，共{len(df)}条数据")
			
 
				+    except Exception as e:
			
 
				+        print(f"❌ 读取CSV失败: {e}")
			
 
				+        return
			
 
				+
			
 
				+    # 确保字段类型正确（如果这两列存在）
			
 
				+    for col in ["material_name", "manufactured_goods"]:
			
 
				+        if col in df.columns:
			
 
				+            df[col] = df[col].astype(str)
			
 
				+
			
 
				+    # 计算真实解冻时长（小时）
			
 
				+    df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
			
 
				+
			
 
				+    # 检查有没有缺失特征
			
 
				+    missing_features = [col for col in feature_columns if col not in df.columns]
			
 
				+    if missing_features:
			
 
				+        print(f"❌ 缺少必要特征列: {missing_features}")
			
 
				+        return
			
 
				+
			
 
				+    # === 2. 智能训练模型 ===
			
 
				+    X = df[feature_columns]
			
 
				+    y = df["t_real_hours"]
			
 
				+
			
 
				+    if len(X) >= 10:
			
 
				+        # 数据够多，做train_test_split
			
 
				+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
			
 
				+        print(f"📚 数据量 {len(X)}，已划分训练集和测试集")
			
 
				+    else:
			
 
				+        # 数据少，直接全量训练
			
 
				+        X_train, y_train = X, y
			
 
				+        X_test, y_test = None, None
			
 
				+        print(f"⚠️ 数据量太少（{len(X)}条），直接全量训练")
			
 
				+
			
 
				+    # 建立随机森林回归模型
			
 
				+    model = RandomForestRegressor(n_estimators=100, random_state=42)
			
 
				+    model.fit(X_train, y_train)
			
 
				+
			
 
				+    # 保存模型
			
 
				+    joblib.dump(model, model_save_path)
			
 
				+    print(f"✅ 模型训练完成，已保存为 {model_save_path}")
			
 
				+
			
 
				+    # 设置定时任务调度器
			
 
				+    scheduler = BlockingScheduler()
			
 
				+
			
 
				+    # 每小时执行一次 train_and_save_model 函数
			
 
				+    scheduler.add_job(train_and_save_model, 'interval', hours=1)
			
 
				+
			
 
				+    # 启动定时任务调度器
			
 
				+    print("⏰ 启动定时任务调度器，每小时自动训练模型...")
			
 
				+
			
 
				+# 设置定时任务调度器
			
 
				+scheduler = BlockingScheduler()
			
 
				+
			
 
				+# 每小时执行一次 train_and_save_model 函数
			
 
				+scheduler.add_job(train_and_save_model, 'interval', hours=1)
			
 
				+
			
 
				+# 启动定时任务调度器
			
 
				+print("⏰ 启动定时任务调度器，每小时自动训练模型...")
			
 
				+scheduler.start()