train_corrector.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. import pandas as pd
  2. import joblib
  3. from apscheduler.schedulers.blocking import BlockingScheduler
  4. from sklearn.ensemble import RandomForestRegressor
  5. from sklearn.model_selection import train_test_split
  6. # === 配置路径 ===
  7. csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv' # 你的csv
  8. model_save_path = "defrost_time_corrector.pkl" # 模型保存路径
  9. # === 特征列定义 ===
  10. feature_columns = [
  11. "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
  12. "T_air", "T_initial", "T_m", "a", "b", "c"
  13. ]
  14. # 定义定时任务的训练函数
  15. def train_and_save_model():
  16. print("🔄 定时任务开始:重新训练模型...")
  17. # === 1. 读取CSV并预处理 ===
  18. try:
  19. df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='utf-8')
  20. print(f"✅ 成功读取CSV文件,共{len(df)}条数据")
  21. except Exception as e:
  22. print(f"❌ 读取CSV失败: {e}")
  23. return
  24. # 确保字段类型正确(如果这两列存在)
  25. for col in ["material_name", "manufactured_goods"]:
  26. if col in df.columns:
  27. df[col] = df[col].astype(str)
  28. # 计算真实解冻时长(小时)
  29. df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
  30. # 检查有没有缺失特征
  31. missing_features = [col for col in feature_columns if col not in df.columns]
  32. if missing_features:
  33. print(f"❌ 缺少必要特征列: {missing_features}")
  34. return
  35. # === 2. 智能训练模型 ===
  36. X = df[feature_columns]
  37. y = df["t_real_hours"]
  38. if len(X) >= 10:
  39. # 数据够多,做train_test_split
  40. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  41. print(f"📚 数据量 {len(X)},已划分训练集和测试集")
  42. else:
  43. # 数据少,直接全量训练
  44. X_train, y_train = X, y
  45. X_test, y_test = None, None
  46. print(f"⚠️ 数据量太少({len(X)}条),直接全量训练")
  47. # 建立随机森林回归模型
  48. model = RandomForestRegressor(n_estimators=100, random_state=42)
  49. model.fit(X_train, y_train)
  50. # 保存模型
  51. joblib.dump(model, model_save_path)
  52. print(f"✅ 模型训练完成,已保存为 {model_save_path}")
  53. # 设置定时任务调度器
  54. scheduler = BlockingScheduler()
  55. # 每小时执行一次 train_and_save_model 函数
  56. scheduler.add_job(train_and_save_model, 'interval', hours=1)
  57. # 启动定时任务调度器
  58. print("⏰ 启动定时任务调度器,每小时自动训练模型...")
  59. # 设置定时任务调度器
  60. scheduler = BlockingScheduler()
  61. # 每小时执行一次 train_and_save_model 函数
  62. scheduler.add_job(train_and_save_model, 'interval', hours=1)
  63. # 启动定时任务调度器
  64. print("⏰ 启动定时任务调度器,每小时自动训练模型...")
  65. scheduler.start()