train_corrector.py 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. import pandas as pd
  2. import joblib
  3. from sklearn.ensemble import RandomForestRegressor
  4. from sklearn.model_selection import train_test_split
  5. # === 配置路径 ===
  6. csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'
  7. model_save_path = "defrost_time_corrector.pkl"
  8. # === 特征列定义 ===
  9. feature_columns = [
  10. "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
  11. "T_air", "T_initial", "T_m", "a", "b", "c"
  12. ]
  13. # === 1. 读取CSV并预处理 ===
  14. df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='gbk')
  15. # 确保字段类型正确
  16. df["material_name"] = df["material_name"].astype(str)
  17. df["manufactured_goods"] = df["manufactured_goods"].astype(str)
  18. # 计算真实解冻时长(小时)
  19. df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
  20. # === 2. 训练模型(用已有所有历史数据) ===
  21. X = df[feature_columns]
  22. y = df["t_real_hours"]
  23. X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  24. model = RandomForestRegressor(n_estimators=100, random_state=42)
  25. model.fit(X_train, y_train)
  26. # 保存模型
  27. joblib.dump(model, model_save_path)
  28. print(f"✅ 模型训练完成,已保存为 {model_save_path}")
  29. # === 3. 用最新数据预测(比如最后一条或多条) ===
  30. # 假设你要预测最后新增的一条数据(如果多条可以改)
  31. new_sample = df.tail(1) # 取最后一行,也可以是 tail(n) 最后n行
  32. X_new = new_sample[feature_columns]
  33. predicted_time = model.predict(X_new)[0]
  34. # 把预测值写回DataFrame
  35. df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
  36. # === 4. 保存带预测值的CSV ===
  37. df.to_csv(csv_path, encoding='gbk', index=False)
  38. print(f"✅ 最新数据预测完成,已更新到 {csv_path}")
  39. print(f"\n📊 预测最后一条数据真实解冻时间为:{predicted_time:.2f} 小时")