Coverage for src/pycse/sklearn/surface_response.py: 0.00%

101 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-23 16:23 -0400

1from sklearn.preprocessing import MinMaxScaler 

2from sklearn.preprocessing import PolynomialFeatures 

3from pycse.sklearn.lr_uq import LinearRegressionUQ 

4from sklearn.pipeline import Pipeline 

5import matplotlib.pyplot as plt 

6 

7from pyDOE3 import fullfact, ff2n, pbdesign, gsd, bbdesign, ccdesign, lhs 

8import numpy as np 

9import pandas as pd 

10import tabulate 

11 

12 

13class SurfaceResponse(Pipeline): 

14 """A class for a Surface Response design of experiment.""" 

15 

16 def __init__( 

17 self, inputs=None, outputs=None, bounds=None, design="bbdesign", model=None, **kwargs 

18 ): 

19 """inputs : list of strings, name of each factor 

20 

21 outputs : list of strings, name of each response 

22 

23 bounds : 2D array, Each row is [xmin, xmax] for a component. 

24 This assumes that [-1, 1] map to the bounds. 

25 

26 design: string, one of the designs in pyDOE3 

27 fullfact, ff2n, pbdesign, gsd, bbdesign, ccdesign, lhs 

28 except fracfact. That one is confusing and I don't know how you use it. 

29 

30 model : an sklearn model, defaults to scaled, order-2 polynomial 

31 features with linear regression. 

32 

33 kwargs are passed to the pyDOE3 model 

34 

35 Builds a linear regression model. The polynomial features are 

36 automatically generated up to the specified order. 

37 

38 """ 

39 self.inputs = inputs 

40 self.outputs = outputs 

41 self.bounds = np.array(bounds) 

42 if design == "fullfact": 

43 self._design = fullfact(**kwargs) 

44 elif design == "ff2n": 

45 self._design = ff2n(len(inputs)) 

46 elif design == "pbdesign": 

47 self._design = pbdesign(len(inputs)) 

48 elif design == "gsd": 

49 self._design = gsd(**kwargs) 

50 elif design == "bbdesign": 

51 self._design = bbdesign(len(inputs), **kwargs) 

52 elif design == "ccdesign": 

53 self._design == ccdesign(len(inputs), **kwargs) 

54 elif design == "lhs": 

55 self._design = lhs(len(inputs), **kwargs) 

56 else: 

57 raise Exception(f"Unsupported design option: {design}") 

58 

59 self.model = model 

60 

61 if model is None: 

62 self.default = True 

63 scaler = MinMaxScaler(feature_range=(-1, 1)).set_output(transform="pandas") 

64 super().__init__( 

65 steps=[ 

66 ("minmax", scaler), 

67 ("poly", PolynomialFeatures(2)), 

68 ("surface response", LinearRegressionUQ()), 

69 ] 

70 ) 

71 else: 

72 self.default = False 

73 super().__init__(steps=[("usermodel", model)]) 

74 

75 def design(self, shuffle=True): 

76 """Creates a design dataframe. 

77 

78 shuffle: Boolean, if True shuffle the results. 

79 

80 Returns 

81 ------- 

82 a data frame 

83 

84 """ 

85 design = self._design 

86 nrows, ncols = design.shape 

87 

88 # with lrange we assume that (-1, 1) maps to (xmin, xmax) 

89 # here a=-1, b=1 

90 # Xsc = a + (x - xmin) * (b - a) / (xmax - xmin) 

91 

92 # solving for x 

93 # x = (Xsc - a) * (xmax - xmin) / (b - a) + xmin 

94 

95 a, b = -1, 1 

96 

97 # lrange is column wise min, max 

98 if self.bounds is not None: 

99 mins = self.bounds[:, 0] 

100 maxs = self.bounds[:, 1] 

101 

102 design = (design - a) * (maxs - mins) / (b - a) + mins 

103 

104 df = pd.DataFrame(data=design, columns=self.inputs) 

105 

106 if shuffle: 

107 df = df.sample(frac=1) 

108 

109 self.input = df 

110 return df 

111 

112 def set_output(self, data): 

113 """Set output to data. 

114 

115 data : list or array of results. Each row should correspond to the same 

116 row in the input. 

117 

118 Returns 

119 ------- 

120 The dataframe containing the outputs. 

121 

122 """ 

123 index = self.input.index 

124 df = pd.DataFrame(data, index=index, columns=self.outputs) 

125 self.output = df 

126 return self.output 

127 

128 def fit(self, X=None, y=None): 

129 """Fit the model to the data.""" 

130 X, y = self.input, self.output 

131 return super().fit(X, y) 

132 

133 def score(self, X=None, y=None): 

134 """Compute the R2 score.""" 

135 X, y = self.input, self.output 

136 return super().score(X, y) 

137 

138 def parity(self): 

139 """Creates parity plot between true values and predicted values.""" 

140 X, y = self.input, self.output 

141 pred = self.predict(X) 

142 plt.scatter(y, pred) 

143 plt.plot(np.linspace(y.min(), y.max()), np.linspace(y.min(), y.max())) 

144 

145 plt.xlabel("True Value") 

146 plt.ylabel("Predicted Value") 

147 plt.title(f"R2 = {self.score():1.3f}") 

148 

149 return plt.gcf() 

150 

151 def _sigfig(self, x, n=3): 

152 """Round X to N significant figures.""" 

153 # Adapted from https://gist.github.com/ttamg/3f65227fd580b3d8dc8ba91e01507280 

154 return np.round(x, -int(np.floor(np.log10(np.abs(x)))) + (n - 1)) 

155 

156 def summary(self): 

157 """Return a summary string of the model. 

158 

159 This includes overall metrics like R2, MAE, RMSE, and for each feature 

160 the parameter, confidence interval, standard error, and significance. If 

161 significance is 0, the parameter is not significant; it means the 

162 confidence interval contains 0. If it is 1 than it is signficant. 

163 

164 Returns 

165 ------- 

166 the summary string. 

167 """ 

168 X, y = self.input, self.output 

169 

170 s = [f"{len(X)} data points"] 

171 yp = self.predict(X) 

172 errs = y - yp 

173 

174 if self.default: 

175 features = self["poly"].get_feature_names_out() 

176 

177 pars = self["surface response"].coefs_ 

178 pars_cint = self["surface response"].pars_cint 

179 pars_se = self["surface response"].pars_se 

180 

181 nrows, ncols = pars.shape 

182 

183 mae = [self._sigfig(x) for x in (np.abs(errs).mean())] 

184 rmse = [self._sigfig(x) for x in (errs**2).mean()] 

185 

186 s += [f" score: {self.score(X, y)}"] 

187 s += [ 

188 f" mae = {(mae)}", 

189 "", 

190 f" rmse = {rmse}", 

191 "", 

192 ] 

193 

194 # this seems to be some corner case where the dimensions are not 

195 # right for ncols=1. 

196 if ncols == 1: 

197 pars_cint = [pars_cint] 

198 

199 for i in range(ncols): # these are the outputs 

200 data = [] 

201 s += [f"Output_{i} = {y.columns[i]}"] 

202 for j, name in enumerate(features): 

203 # These indexes are confusing. 

204 # i is the ith output 

205 # j is the jth feature 

206 # cint has a shape of (noutputs, nfeatures, 2) 

207 # se has a shape of (nfeatures, noutputs) 

208 data += [ 

209 [ 

210 f"{name}_{i}", 

211 pars[j][i], 

212 pars_cint[i][j][0], 

213 pars_cint[i][j][1], 

214 pars_se[j][i], 

215 np.sign(pars_cint[i][j][0] * pars_cint[i][j][1]) > 0, 

216 ] 

217 ] 

218 s += [ 

219 tabulate.tabulate( 

220 data, 

221 headers=["var", "value", "ci_lower", "ci_upper", "se", "significant"], 

222 tablefmt="orgtbl", 

223 ) 

224 ] 

225 s += [""] 

226 else: 

227 s += ["User defined model:", repr(self["usermodel"])] 

228 

229 mae = [self._sigfig(x) for x in (np.abs(errs).mean())] 

230 rmse = [self._sigfig(x) for x in (errs**2).mean()] 

231 

232 s += [f" score: {self.score(X, y)}"] 

233 s += [ 

234 f" mae = {(mae)}", 

235 "", 

236 f" rmse = {rmse}", 

237 "", 

238 ] 

239 

240 return "\n".join(s)