Coverage for src/pycse/sklearn/surface_response.py: 0.00%
101 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-23 16:23 -0400
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-23 16:23 -0400
1from sklearn.preprocessing import MinMaxScaler
2from sklearn.preprocessing import PolynomialFeatures
3from pycse.sklearn.lr_uq import LinearRegressionUQ
4from sklearn.pipeline import Pipeline
5import matplotlib.pyplot as plt
7from pyDOE3 import fullfact, ff2n, pbdesign, gsd, bbdesign, ccdesign, lhs
8import numpy as np
9import pandas as pd
10import tabulate
13class SurfaceResponse(Pipeline):
14 """A class for a Surface Response design of experiment."""
16 def __init__(
17 self, inputs=None, outputs=None, bounds=None, design="bbdesign", model=None, **kwargs
18 ):
19 """inputs : list of strings, name of each factor
21 outputs : list of strings, name of each response
23 bounds : 2D array, Each row is [xmin, xmax] for a component.
24 This assumes that [-1, 1] map to the bounds.
26 design: string, one of the designs in pyDOE3
27 fullfact, ff2n, pbdesign, gsd, bbdesign, ccdesign, lhs
28 except fracfact. That one is confusing and I don't know how you use it.
30 model : an sklearn model, defaults to scaled, order-2 polynomial
31 features with linear regression.
33 kwargs are passed to the pyDOE3 model
35 Builds a linear regression model. The polynomial features are
36 automatically generated up to the specified order.
38 """
39 self.inputs = inputs
40 self.outputs = outputs
41 self.bounds = np.array(bounds)
42 if design == "fullfact":
43 self._design = fullfact(**kwargs)
44 elif design == "ff2n":
45 self._design = ff2n(len(inputs))
46 elif design == "pbdesign":
47 self._design = pbdesign(len(inputs))
48 elif design == "gsd":
49 self._design = gsd(**kwargs)
50 elif design == "bbdesign":
51 self._design = bbdesign(len(inputs), **kwargs)
52 elif design == "ccdesign":
53 self._design == ccdesign(len(inputs), **kwargs)
54 elif design == "lhs":
55 self._design = lhs(len(inputs), **kwargs)
56 else:
57 raise Exception(f"Unsupported design option: {design}")
59 self.model = model
61 if model is None:
62 self.default = True
63 scaler = MinMaxScaler(feature_range=(-1, 1)).set_output(transform="pandas")
64 super().__init__(
65 steps=[
66 ("minmax", scaler),
67 ("poly", PolynomialFeatures(2)),
68 ("surface response", LinearRegressionUQ()),
69 ]
70 )
71 else:
72 self.default = False
73 super().__init__(steps=[("usermodel", model)])
75 def design(self, shuffle=True):
76 """Creates a design dataframe.
78 shuffle: Boolean, if True shuffle the results.
80 Returns
81 -------
82 a data frame
84 """
85 design = self._design
86 nrows, ncols = design.shape
88 # with lrange we assume that (-1, 1) maps to (xmin, xmax)
89 # here a=-1, b=1
90 # Xsc = a + (x - xmin) * (b - a) / (xmax - xmin)
92 # solving for x
93 # x = (Xsc - a) * (xmax - xmin) / (b - a) + xmin
95 a, b = -1, 1
97 # lrange is column wise min, max
98 if self.bounds is not None:
99 mins = self.bounds[:, 0]
100 maxs = self.bounds[:, 1]
102 design = (design - a) * (maxs - mins) / (b - a) + mins
104 df = pd.DataFrame(data=design, columns=self.inputs)
106 if shuffle:
107 df = df.sample(frac=1)
109 self.input = df
110 return df
112 def set_output(self, data):
113 """Set output to data.
115 data : list or array of results. Each row should correspond to the same
116 row in the input.
118 Returns
119 -------
120 The dataframe containing the outputs.
122 """
123 index = self.input.index
124 df = pd.DataFrame(data, index=index, columns=self.outputs)
125 self.output = df
126 return self.output
128 def fit(self, X=None, y=None):
129 """Fit the model to the data."""
130 X, y = self.input, self.output
131 return super().fit(X, y)
133 def score(self, X=None, y=None):
134 """Compute the R2 score."""
135 X, y = self.input, self.output
136 return super().score(X, y)
138 def parity(self):
139 """Creates parity plot between true values and predicted values."""
140 X, y = self.input, self.output
141 pred = self.predict(X)
142 plt.scatter(y, pred)
143 plt.plot(np.linspace(y.min(), y.max()), np.linspace(y.min(), y.max()))
145 plt.xlabel("True Value")
146 plt.ylabel("Predicted Value")
147 plt.title(f"R2 = {self.score():1.3f}")
149 return plt.gcf()
151 def _sigfig(self, x, n=3):
152 """Round X to N significant figures."""
153 # Adapted from https://gist.github.com/ttamg/3f65227fd580b3d8dc8ba91e01507280
154 return np.round(x, -int(np.floor(np.log10(np.abs(x)))) + (n - 1))
156 def summary(self):
157 """Return a summary string of the model.
159 This includes overall metrics like R2, MAE, RMSE, and for each feature
160 the parameter, confidence interval, standard error, and significance. If
161 significance is 0, the parameter is not significant; it means the
162 confidence interval contains 0. If it is 1 than it is signficant.
164 Returns
165 -------
166 the summary string.
167 """
168 X, y = self.input, self.output
170 s = [f"{len(X)} data points"]
171 yp = self.predict(X)
172 errs = y - yp
174 if self.default:
175 features = self["poly"].get_feature_names_out()
177 pars = self["surface response"].coefs_
178 pars_cint = self["surface response"].pars_cint
179 pars_se = self["surface response"].pars_se
181 nrows, ncols = pars.shape
183 mae = [self._sigfig(x) for x in (np.abs(errs).mean())]
184 rmse = [self._sigfig(x) for x in (errs**2).mean()]
186 s += [f" score: {self.score(X, y)}"]
187 s += [
188 f" mae = {(mae)}",
189 "",
190 f" rmse = {rmse}",
191 "",
192 ]
194 # this seems to be some corner case where the dimensions are not
195 # right for ncols=1.
196 if ncols == 1:
197 pars_cint = [pars_cint]
199 for i in range(ncols): # these are the outputs
200 data = []
201 s += [f"Output_{i} = {y.columns[i]}"]
202 for j, name in enumerate(features):
203 # These indexes are confusing.
204 # i is the ith output
205 # j is the jth feature
206 # cint has a shape of (noutputs, nfeatures, 2)
207 # se has a shape of (nfeatures, noutputs)
208 data += [
209 [
210 f"{name}_{i}",
211 pars[j][i],
212 pars_cint[i][j][0],
213 pars_cint[i][j][1],
214 pars_se[j][i],
215 np.sign(pars_cint[i][j][0] * pars_cint[i][j][1]) > 0,
216 ]
217 ]
218 s += [
219 tabulate.tabulate(
220 data,
221 headers=["var", "value", "ci_lower", "ci_upper", "se", "significant"],
222 tablefmt="orgtbl",
223 )
224 ]
225 s += [""]
226 else:
227 s += ["User defined model:", repr(self["usermodel"])]
229 mae = [self._sigfig(x) for x in (np.abs(errs).mean())]
230 rmse = [self._sigfig(x) for x in (errs**2).mean()]
232 s += [f" score: {self.score(X, y)}"]
233 s += [
234 f" mae = {(mae)}",
235 "",
236 f" rmse = {rmse}",
237 "",
238 ]
240 return "\n".join(s)