Coverage for src/m5py/linreg_utils.py: 16%
78 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-22 17:10 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-22 17:10 +0000
1from abc import abstractmethod, ABCMeta
3import numpy as np
5from sklearn.linear_model import LinearRegression
6from sklearn.linear_model._base import LinearModel
7from sklearn.preprocessing import StandardScaler
8from sklearn.utils.extmath import safe_sparse_dot
11def linreg_model_to_text(model, feature_names=None, target_name=None,
12 precision=3, line_breaks=False):
13 """
14 Converts a linear regression model to a text representation.
16 :param model:
17 :param feature_names:
18 :param target_name:
19 :param precision:
20 :param line_breaks: if True, each term in the sum shows in a different line
21 :return:
22 """
23 bits = []
25 # Template for numbers: we want scientific notation with a given precision
26 nb_tpl = "%%0.%se" % precision
28 # Handle multi-dimensional y (its second dim must be size 1, though)
29 if len(model.coef_.shape) > 1:
30 assert model.coef_.shape[0] == 1
31 assert len(model.coef_.shape) == 2
32 coefs = np.ravel(model.coef_) # convert to 1D
33 assert len(model.intercept_) == 1
34 intercept = model.intercept_.item() # extract scalar
35 else:
36 coefs = model.coef_ # a 1D array
37 intercept = model.intercept_ # a scalar already
39 # First all coefs * drivers
40 for i, c in enumerate(coefs):
41 var_name = ("X[%s]" % i) if feature_names is None else feature_names[i]
43 if i == 0:
44 # first term
45 if c < 1:
46 # use scientific notation
47 product_text = (nb_tpl + " * %s") % (c, var_name)
48 else:
49 # use standard notation with precision
50 c = np.round(c, precision)
51 product_text = "%s * %s" % (c, var_name)
52 else:
53 # all the other terms: the sign should appear
54 lb = '\n' if line_breaks else ""
55 coef_abs = np.abs(c)
56 coef_sign = '+' if np.sign(c) > 0 else '-'
57 if coef_abs < 1:
58 # use scientific notation
59 product_text = (("%s%s " + nb_tpl + " * %s")
60 % (lb, coef_sign, coef_abs, var_name))
61 else:
62 # use standard notation with precision
63 coef_abs = np.round(coef_abs, precision)
64 product_text = ("%s%s %s * %s"
65 % (lb, coef_sign, coef_abs, var_name))
67 bits.append(product_text)
69 # Finally the intercept
70 if len(bits) == 0:
71 # intercept is the only term in the sum
72 if intercept < 1:
73 # use scientific notation only for small numbers (otherwise 12
74 # would read 1.2e1 ... not friendly)
75 constant_text = nb_tpl % intercept
76 else:
77 # use standard notation with precision
78 i = np.round(intercept, precision)
79 constant_text = "%s" % i
80 else:
81 # there are other terms in the sum: the sign should appear
82 lb = '\n' if line_breaks else ""
83 coef_abs = np.abs(intercept)
84 coef_sign = '+' if np.sign(intercept) > 0 else '-'
85 if coef_abs < 1:
86 # use scientific notation
87 constant_text = ("%s%s " + nb_tpl) % (lb, coef_sign, coef_abs)
88 else:
89 # use standard notation with precision
90 coef_abs = np.round(coef_abs, precision)
91 constant_text = "%s%s %s" % (lb, coef_sign, coef_abs)
93 bits.append(constant_text)
95 txt = " ".join(bits)
96 if target_name is not None:
97 txt = target_name + " = " + txt
99 return txt
102class DeNormalizableMixIn(metaclass=ABCMeta):
103 """
104 An abstract class that models able to de-normalize should implement.
105 """
106 __slots__ = ()
108 @abstractmethod
109 def denormalize(self,
110 x_scaler: StandardScaler = None,
111 y_scaler: StandardScaler = None
112 ):
113 """
114 Denormalizes the model, knowing that it was fit with the given
115 x_scaler and y_scaler
116 """
119class DeNormalizableLinearModelMixIn(DeNormalizableMixIn, LinearModel):
120 """
121 A mix-in class to add 'denormalization' capability to a linear model
122 """
123 def denormalize(self,
124 x_scaler: StandardScaler = None,
125 y_scaler: StandardScaler = None
126 ):
127 """
128 De-normalizes the linear regression model.
129 Before this function is executed,
130 (y-y_mean)/y_scale = self.coef_.T <dot> (x-x_mean)/x_scale + self.intercept_
131 so
132 (y-y_mean)/y_scale = (self.coef_/x_scale).T <dot> x + (self.intercept_ - self.coef_.T <dot> x_mean/x_scale)
133 that is
134 (y-y_mean)/y_scale = new_coef.T <dot> x + new_intercept
135 where
136 * new_coef = (self.coef_/x_scale)
137 * new_intercept = (self.intercept_ - (self.intercept_ - self.coef_.T <dot> x_mean/x_scale)
139 Then going back to y
140 y = (new_coef * y_scale).T <dot> x + (new_intercept * y_scale + y_mean)
142 :param self:
143 :param x_scaler:
144 :param y_scaler:
145 :return:
146 """
147 # First save old coefficients
148 self.normalized_coef_ = self.coef_
149 self.normalized_intercept_ = self.intercept_
151 # denormalize coefficients to take into account the x normalization
152 if x_scaler is not None:
153 new_coef = self.coef_ / x_scaler.scale_
154 new_intercept = (
155 self.intercept_ -
156 safe_sparse_dot(x_scaler.mean_, new_coef.T,
157 dense_output=True)
158 )
160 self.coef_ = new_coef
161 self.intercept_ = new_intercept
163 # denormalize them further to take into account the y normalization
164 if y_scaler is not None:
165 new_coef = self.coef_ * y_scaler.scale_
166 new_intercept = y_scaler.inverse_transform(
167 np.atleast_1d(self.intercept_)
168 )
169 if np.isscalar(self.intercept_):
170 new_intercept = new_intercept[0]
171 self.coef_ = new_coef
172 self.intercept_ = new_intercept
175class DeNormalizableLinearRegression(LinearRegression,
176 DeNormalizableLinearModelMixIn):
177 """
178 A Denormalizable linear regression. The old normalized coefficients are
179 kept in a new field named `feature_importances_`
180 """
181 @property
182 def feature_importances_(self):
183 if hasattr(self, '_feature_importances_'):
184 return self._feature_importances_
185 else:
186 return self.coef_
188 def denormalize(self,
189 x_scaler: StandardScaler = None,
190 y_scaler: StandardScaler = None):
191 """
192 Denormalizes the model, and saves a copy of the old normalized
193 coefficients in self._feature_importances.
195 :param x_scaler:
196 :param y_scaler:
197 :return:
198 """
199 self._feature_importances_ = self.coef_
200 super(DeNormalizableLinearRegression, self).denormalize(x_scaler,
201 y_scaler)
204# For all other it should work too
205# class DeNormalizableLasso(Lasso, DeNormalizableLinearModelMixIn):
206# pass