import pandas as pd import joblib from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split # === 步骤1:读取CSV并预处理 === csv_path = 'C:\\Users\\Administrator\\Desktop\\defrost\\feedback_data.csv' df = pd.read_csv(csv_path, parse_dates=["t_formula", "t_real"], encoding='gbk') # 确保类型一致 df["material_name"] = df["material_name"].astype(str) df["manufactured_goods"] = df["manufactured_goods"].astype(str) # 计算真实解冻时长(单位:小时) df["t_real_hours"] = (df["t_real"] - df["t_formula"]).dt.total_seconds() / 3600 # 特征列(不包括物料名称和产品名称) feature_columns = [ "w", "rho_coal", "rho_ice", "C_coal", "C_ice", "L", "k_coal", "k_ice", "h", "T_air", "T_initial", "T_m", "a", "b", "c" ] # 模型输入和标签 X = df[feature_columns].copy() y = df["t_real_hours"] # === 步骤2:训练模型 === X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) model = RandomForestRegressor(n_estimators=100, random_state=42) model.fit(X_train, y_train) # === 步骤3:保存模型 === joblib.dump(model, "defrost_time_corrector.pkl") print("模型训练完成并已保存为 defrost_time_corrector.pkl") # === 步骤4:测试一个新样本并判断是否为相同样本类型 === new_sample_info = { "material_name": "国产动力煤", "manufactured_goods": "龙家堡洗混煤-5206", "w": 12, "rho_coal": 3000, "rho_ice": 917, "C_coal": 800, "C_ice": 2100, "L": 334000, "k_coal": 20, "k_ice": 2.2, "h": 300, "T_air": 90, "T_initial": -20, "T_m": 0, "a": 13, "b": 2.72, "c": 1.6 } # 构造 DataFrame new_sample = pd.DataFrame([new_sample_info]) # 一致性判断 is_known = ((df["material_name"] == new_sample_info["material_name"]) & (df["manufactured_goods"] == new_sample_info["manufactured_goods"])).any() # 只传入特征列用于模型预测 X_new = new_sample[feature_columns] predicted_time = model.predict(X_new)[0] print(f"\n📊 预测真实解冻时间: {predicted_time:.2f} 小时") if is_known: print("该样本与历史数据中存在相同物料和制造品,可以认为是同一类样本。") else: print("该样本是新的物料或产品组合,可能存在偏差,请注意验证。")