Kaynağa Gözat

加入定时任务 生成pkl模型文件

Administrator 1 ay önce
ebeveyn
işleme
5540ec0ebe
1 değiştirilmiş dosya ile 68 ekleme ve 63 silme
  1. 68 63
      train_corrector.py

+ 68 - 63
train_corrector.py

@@ -1,6 +1,8 @@
 import pandas as pd
 import joblib
+from apscheduler.schedulers.blocking import BlockingScheduler
 from sklearn.ensemble import RandomForestRegressor
+from sklearn.model_selection import train_test_split
 
 # === 配置路径 ===
 csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'  # 你的csv
@@ -12,66 +14,69 @@ feature_columns = [
     "T_air", "T_initial", "T_m", "a", "b", "c"
 ]
 
-# === 1. 读取CSV并预处理 ===
-try:
-    df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
-    print(f"✅ 成功读取CSV文件,共{len(df)}条数据")
-except Exception as e:
-    print(f"❌ 读取CSV失败: {e}")
-    exit(1)
-
-# 确保字段类型正确(如果这两列存在)
-for col in ["material_name", "manufactured_goods"]:
-    if col in df.columns:
-        df[col] = df[col].astype(str)
-
-# 计算真实解冻时长(小时)
-df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
-
-# 检查有没有缺失特征
-missing_features = [col for col in feature_columns if col not in df.columns]
-if missing_features:
-    print(f"❌ 缺少必要特征列: {missing_features}")
-    exit(1)
-
-# === 2. 智能训练模型 ===
-X = df[feature_columns]
-y = df["t_real_hours"]
-
-if len(X) >= 10:
-    # 数据够多,做train_test_split
-    from sklearn.model_selection import train_test_split
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    print(f"📚 数据量 {len(X)},已划分训练集和测试集")
-else:
-    # 数据少,直接全量训练
-    X_train, y_train = X, y
-    X_test, y_test = None, None
-    print(f"⚠️ 数据量太少({len(X)}条),直接全量训练")
-
-# 建立随机森林回归模型
-model = RandomForestRegressor(n_estimators=100, random_state=42)
-model.fit(X_train, y_train)
-
-# 保存模型
-joblib.dump(model, model_save_path)
-print(f"✅ 模型训练完成,已保存为 {model_save_path}")
-
-# === 3. 预测最新一条数据 ===
-new_sample = df.tail(1)  # 取最后一行
-
-X_new = new_sample[feature_columns]
-predicted_time = model.predict(X_new)[0]
-
-# 把预测值写回DataFrame
-df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
-
-# === 4. 保存带预测值的CSV ===
-try:
-    df.to_csv(csv_path, encoding='utf-8', index=False)
-    print(f"✅ 最新数据预测完成,已更新到 {csv_path}")
-except Exception as e:
-    print(f"❌ 保存CSV失败: {e}")
-
-# === 5. 打印最终预测结果 ===
-print(f"\n📊 预测最后一条数据真实解冻时间为:{predicted_time:.2f} 小时")
+# 定义定时任务的训练函数
+def train_and_save_model():
+    print("🔄 定时任务开始:重新训练模型...")
+
+    # === 1. 读取CSV并预处理 ===
+    try:
+        df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
+        print(f"✅ 成功读取CSV文件,共{len(df)}条数据")
+    except Exception as e:
+        print(f"❌ 读取CSV失败: {e}")
+        return
+
+    # 确保字段类型正确(如果这两列存在)
+    for col in ["material_name", "manufactured_goods"]:
+        if col in df.columns:
+            df[col] = df[col].astype(str)
+
+    # 计算真实解冻时长(小时)
+    df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
+
+    # 检查有没有缺失特征
+    missing_features = [col for col in feature_columns if col not in df.columns]
+    if missing_features:
+        print(f"❌ 缺少必要特征列: {missing_features}")
+        return
+
+    # === 2. 智能训练模型 ===
+    X = df[feature_columns]
+    y = df["t_real_hours"]
+
+    if len(X) >= 10:
+        # 数据够多,做train_test_split
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+        print(f"📚 数据量 {len(X)},已划分训练集和测试集")
+    else:
+        # 数据少,直接全量训练
+        X_train, y_train = X, y
+        X_test, y_test = None, None
+        print(f"⚠️ 数据量太少({len(X)}条),直接全量训练")
+
+    # 建立随机森林回归模型
+    model = RandomForestRegressor(n_estimators=100, random_state=42)
+    model.fit(X_train, y_train)
+
+    # 保存模型
+    joblib.dump(model, model_save_path)
+    print(f"✅ 模型训练完成,已保存为 {model_save_path}")
+
+    # 设置定时任务调度器
+    scheduler = BlockingScheduler()
+
+    # 每小时执行一次 train_and_save_model 函数
+    scheduler.add_job(train_and_save_model, 'interval', hours=1)
+
+    # 启动定时任务调度器
+    print("⏰ 启动定时任务调度器,每小时自动训练模型...")
+
+# 设置定时任务调度器
+scheduler = BlockingScheduler()
+
+# 每小时执行一次 train_and_save_model 函数
+scheduler.add_job(train_and_save_model, 'interval', hours=1)
+
+# 启动定时任务调度器
+print("⏰ 启动定时任务调度器,每小时自动训练模型...")
+scheduler.start()