Administrator пре 1 месец
родитељ
комит
c236c4f9dd
1 измењених фајлова са 33 додато и 50 уклоњено
  1. 33 50
      train_corrector.py

+ 33 - 50
train_corrector.py

@@ -3,70 +3,53 @@ import joblib
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import train_test_split
 
-# === 步骤1:读取CSV并预处理 ===
+# === 配置路径 ===
 csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'
+model_save_path = "defrost_time_corrector.pkl"
+
+# === 特征列定义 ===
+feature_columns = [
+    "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
+    "T_air", "T_initial", "T_m", "a", "b", "c"
+]
+
+# === 1. 读取CSV并预处理 ===
 df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='gbk')
 
-# 确保类型一致
+# 确保字段类型正确
 df["material_name"] = df["material_name"].astype(str)
 df["manufactured_goods"] = df["manufactured_goods"].astype(str)
 
-# 计算真实解冻时长(单位:小时)
+# 计算真实解冻时长(小时)
 df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
 
-# 特征列(不包括物料名称和产品名称)
-feature_columns = [
-    "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
-    "T_air", "T_initial", "T_m", "a", "b", "c"
-]
-
-# 模型输入和标签
-X = df[feature_columns].copy()
+# === 2. 训练模型(用已有所有历史数据) ===
+X = df[feature_columns]
 y = df["t_real_hours"]
 
-# === 步骤2:训练模型 ===
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
 model = RandomForestRegressor(n_estimators=100, random_state=42)
 model.fit(X_train, y_train)
 
-# === 步骤3:保存模型 ===
-joblib.dump(model, "defrost_time_corrector.pkl")
-print("模型训练完成并已保存为 defrost_time_corrector.pkl")
-
-# === 步骤4:测试一个新样本并判断是否为相同样本类型 ===
-new_sample_info = {
-    "material_name": "国产动力煤",
-    "manufactured_goods": "龙家堡洗混煤-5206",
-    "w": 12,
-    "rho_coal": 3000,
-    "rho_ice": 917,
-    "C_coal": 800,
-    "C_ice": 2100,
-    "L": 334000,
-    "k_coal": 20,
-    "k_ice": 2.2,
-    "h": 300,
-    "T_air": 90,
-    "T_initial": -20,
-    "T_m": 0,
-    "a": 13,
-    "b": 2.72,
-    "c": 1.6
-}
-
-# 构造 DataFrame
-new_sample = pd.DataFrame([new_sample_info])
-
-# 一致性判断
-is_known = ((df["material_name"] == new_sample_info["material_name"]) &
-            (df["manufactured_goods"] == new_sample_info["manufactured_goods"])).any()
-
-# 只传入特征列用于模型预测
+# 保存模型
+joblib.dump(model, model_save_path)
+print(f"✅ 模型训练完成,已保存为 {model_save_path}")
+
+# === 3. 用最新数据预测(比如最后一条或多条) ===
+
+# 假设你要预测最后新增的一条数据(如果多条可以改)
+new_sample = df.tail(1)  # 取最后一行,也可以是 tail(n) 最后n行
+
 X_new = new_sample[feature_columns]
 predicted_time = model.predict(X_new)[0]
 
-print(f"\n📊 预测真实解冻时间: {predicted_time:.2f} 小时")
-if is_known:
-    print("该样本与历史数据中存在相同物料和制造品,可以认为是同一类样本。")
-else:
-    print("该样本是新的物料或产品组合,可能存在偏差,请注意验证。")
+# 把预测值写回DataFrame
+df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
+
+# === 4. 保存带预测值的CSV ===
+df.to_csv(csv_path, encoding='gbk', index=False)
+print(f"✅ 最新数据预测完成,已更新到 {csv_path}")
+
+print(f"\n📊 预测最后一条数据真实解冻时间为:{predicted_time:.2f} 小时")
+