|
@@ -3,70 +3,53 @@ import joblib
|
|
|
from sklearn.ensemble import RandomForestRegressor
|
|
|
from sklearn.model_selection import train_test_split
|
|
|
|
|
|
-# === 步骤1:读取CSV并预处理 ===
|
|
|
+# === 配置路径 ===
|
|
|
csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv'
|
|
|
+model_save_path = "defrost_time_corrector.pkl"
|
|
|
+
|
|
|
+# === 特征列定义 ===
|
|
|
+feature_columns = [
|
|
|
+ "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
|
|
|
+ "T_air", "T_initial", "T_m", "a", "b", "c"
|
|
|
+]
|
|
|
+
|
|
|
+# === 1. 读取CSV并预处理 ===
|
|
|
df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='gbk')
|
|
|
|
|
|
-# 确保类型一致
|
|
|
+# 确保字段类型正确
|
|
|
df["material_name"] = df["material_name"].astype(str)
|
|
|
df["manufactured_goods"] = df["manufactured_goods"].astype(str)
|
|
|
|
|
|
-# 计算真实解冻时长(单位:小时)
|
|
|
+# 计算真实解冻时长(小时)
|
|
|
df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600
|
|
|
|
|
|
-# 特征列(不包括物料名称和产品名称)
|
|
|
-feature_columns = [
|
|
|
- "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h",
|
|
|
- "T_air", "T_initial", "T_m", "a", "b", "c"
|
|
|
-]
|
|
|
-
|
|
|
-# 模型输入和标签
|
|
|
-X = df[feature_columns].copy()
|
|
|
+# === 2. 训练模型(用已有所有历史数据) ===
|
|
|
+X = df[feature_columns]
|
|
|
y = df["t_real_hours"]
|
|
|
|
|
|
-# === 步骤2:训练模型 ===
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
|
+
|
|
|
model = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
|
model.fit(X_train, y_train)
|
|
|
|
|
|
-# === 步骤3:保存模型 ===
|
|
|
-joblib.dump(model, "defrost_time_corrector.pkl")
|
|
|
-print("模型训练完成并已保存为 defrost_time_corrector.pkl")
|
|
|
-
|
|
|
-# === 步骤4:测试一个新样本并判断是否为相同样本类型 ===
|
|
|
-new_sample_info = {
|
|
|
- "material_name": "国产动力煤",
|
|
|
- "manufactured_goods": "龙家堡洗混煤-5206",
|
|
|
- "w": 12,
|
|
|
- "rho_coal": 3000,
|
|
|
- "rho_ice": 917,
|
|
|
- "C_coal": 800,
|
|
|
- "C_ice": 2100,
|
|
|
- "L": 334000,
|
|
|
- "k_coal": 20,
|
|
|
- "k_ice": 2.2,
|
|
|
- "h": 300,
|
|
|
- "T_air": 90,
|
|
|
- "T_initial": -20,
|
|
|
- "T_m": 0,
|
|
|
- "a": 13,
|
|
|
- "b": 2.72,
|
|
|
- "c": 1.6
|
|
|
-}
|
|
|
-
|
|
|
-# 构造 DataFrame
|
|
|
-new_sample = pd.DataFrame([new_sample_info])
|
|
|
-
|
|
|
-# 一致性判断
|
|
|
-is_known = ((df["material_name"] == new_sample_info["material_name"]) &
|
|
|
- (df["manufactured_goods"] == new_sample_info["manufactured_goods"])).any()
|
|
|
-
|
|
|
-# 只传入特征列用于模型预测
|
|
|
+# 保存模型
|
|
|
+joblib.dump(model, model_save_path)
|
|
|
+print(f"✅ 模型训练完成,已保存为 {model_save_path}")
|
|
|
+
|
|
|
+# === 3. 用最新数据预测(比如最后一条或多条) ===
|
|
|
+
|
|
|
+# 假设你要预测最后新增的一条数据(如果多条可以改)
|
|
|
+new_sample = df.tail(1) # 取最后一行,也可以是 tail(n) 最后n行
|
|
|
+
|
|
|
X_new = new_sample[feature_columns]
|
|
|
predicted_time = model.predict(X_new)[0]
|
|
|
|
|
|
-print(f"\n📊 预测真实解冻时间: {predicted_time:.2f} 小时")
|
|
|
-if is_known:
|
|
|
- print("该样本与历史数据中存在相同物料和制造品,可以认为是同一类样本。")
|
|
|
-else:
|
|
|
- print("该样本是新的物料或产品组合,可能存在偏差,请注意验证。")
|
|
|
+# 把预测值写回DataFrame
|
|
|
+df.loc[new_sample.index, "predicted_t_real_hours"] = predicted_time
|
|
|
+
|
|
|
+# === 4. 保存带预测值的CSV ===
|
|
|
+df.to_csv(csv_path, encoding='gbk', index=False)
|
|
|
+print(f"✅ 最新数据预测完成,已更新到 {csv_path}")
|
|
|
+
|
|
|
+print(f"\n📊 预测最后一条数据真实解冻时间为:{predicted_time:.2f} 小时")
|
|
|
+
|