Coverage for src/m5py/linreg

1from abc import abstractmethod, ABCMeta

3import numpy as np

5from sklearn.linear_model import LinearRegression

6from sklearn.linear_model._base import LinearModel

7from sklearn.preprocessing import StandardScaler

8from sklearn.utils.extmath import safe_sparse_dot

11def linreg_model_to_text(model, feature_names=None, target_name=None,

12 precision=3, line_breaks=False):

13 """

14 Converts a linear regression model to a text representation.

16 :param model:

17 :param feature_names:

18 :param target_name:

19 :param precision:

20 :param line_breaks: if True, each term in the sum shows in a different line

21 :return:

22 """

23 bits = []

25 # Template for numbers: we want scientific notation with a given precision

26 nb_tpl = "%%0.%se" % precision

28 # Handle multi-dimensional y (its second dim must be size 1, though)

29 if len(model.coef_.shape) > 1:

30 assert model.coef_.shape[0] == 1

31 assert len(model.coef_.shape) == 2

32 coefs = np.ravel(model.coef_) # convert to 1D

33 assert len(model.intercept_) == 1

34 intercept = model.intercept_.item() # extract scalar

35 else:

36 coefs = model.coef_ # a 1D array

37 intercept = model.intercept_ # a scalar already

39 # First all coefs * drivers

40 for i, c in enumerate(coefs):

41 var_name = ("X[%s]" % i) if feature_names is None else feature_names[i]

43 if i == 0:

44 # first term

45 if c < 1:

46 # use scientific notation

47 product_text = (nb_tpl + " * %s") % (c, var_name)

48 else:

49 # use standard notation with precision

50 c = np.round(c, precision)

51 product_text = "%s * %s" % (c, var_name)

52 else:

53 # all the other terms: the sign should appear

54 lb = '\n' if line_breaks else ""

55 coef_abs = np.abs(c)

56 coef_sign = '+' if np.sign(c) > 0 else '-'

57 if coef_abs < 1:

58 # use scientific notation

59 product_text = (("%s%s " + nb_tpl + " * %s")

60 % (lb, coef_sign, coef_abs, var_name))

61 else:

62 # use standard notation with precision

63 coef_abs = np.round(coef_abs, precision)

64 product_text = ("%s%s %s * %s"

65 % (lb, coef_sign, coef_abs, var_name))

67 bits.append(product_text)

69 # Finally the intercept

70 if len(bits) == 0:

71 # intercept is the only term in the sum

72 if intercept < 1:

73 # use scientific notation only for small numbers (otherwise 12

74 # would read 1.2e1 ... not friendly)

75 constant_text = nb_tpl % intercept

76 else:

77 # use standard notation with precision

78 i = np.round(intercept, precision)

79 constant_text = "%s" % i

80 else:

81 # there are other terms in the sum: the sign should appear

82 lb = '\n' if line_breaks else ""

83 coef_abs = np.abs(intercept)

84 coef_sign = '+' if np.sign(intercept) > 0 else '-'

85 if coef_abs < 1:

86 # use scientific notation

87 constant_text = ("%s%s " + nb_tpl) % (lb, coef_sign, coef_abs)

88 else:

89 # use standard notation with precision

90 coef_abs = np.round(coef_abs, precision)

91 constant_text = "%s%s %s" % (lb, coef_sign, coef_abs)

93 bits.append(constant_text)

95 txt = " ".join(bits)

96 if target_name is not None:

97 txt = target_name + " = " + txt

99 return txt

100

101

102class DeNormalizableMixIn(metaclass=ABCMeta):

103 """

104 An abstract class that models able to de-normalize should implement.

105 """

106 __slots__ = ()

107

108 @abstractmethod

109 def denormalize(self,

110 x_scaler: StandardScaler = None,

111 y_scaler: StandardScaler = None

112 ):

113 """

114 Denormalizes the model, knowing that it was fit with the given

115 x_scaler and y_scaler

116 """

117

118

119class DeNormalizableLinearModelMixIn(DeNormalizableMixIn, LinearModel):

120 """

121 A mix-in class to add 'denormalization' capability to a linear model

122 """

123 def denormalize(self,

124 x_scaler: StandardScaler = None,

125 y_scaler: StandardScaler = None

126 ):

127 """

128 De-normalizes the linear regression model.

129 Before this function is executed,

130 (y-y_mean)/y_scale = self.coef_.T <dot> (x-x_mean)/x_scale + self.intercept_

131 so

132 (y-y_mean)/y_scale = (self.coef_/x_scale).T <dot> x + (self.intercept_ - self.coef_.T <dot> x_mean/x_scale)

133 that is

134 (y-y_mean)/y_scale = new_coef.T <dot> x + new_intercept

135 where

136 * new_coef = (self.coef_/x_scale)

137 * new_intercept = (self.intercept_ - (self.intercept_ - self.coef_.T <dot> x_mean/x_scale)

138

139 Then going back to y

140 y = (new_coef * y_scale).T <dot> x + (new_intercept * y_scale + y_mean)

141

142 :param self:

143 :param x_scaler:

144 :param y_scaler:

145 :return:

146 """

147 # First save old coefficients

148 self.normalized_coef_ = self.coef_

149 self.normalized_intercept_ = self.intercept_

150

151 # denormalize coefficients to take into account the x normalization

152 if x_scaler is not None:

153 new_coef = self.coef_ / x_scaler.scale_

154 new_intercept = (

155 self.intercept_ -

156 safe_sparse_dot(x_scaler.mean_, new_coef.T,

157 dense_output=True)

158 )

159

160 self.coef_ = new_coef

161 self.intercept_ = new_intercept

162

163 # denormalize them further to take into account the y normalization

164 if y_scaler is not None:

165 new_coef = self.coef_ * y_scaler.scale_

166 new_intercept = y_scaler.inverse_transform(

167 np.atleast_1d(self.intercept_)

168 )

169 if np.isscalar(self.intercept_):

170 new_intercept = new_intercept[0]

171 self.coef_ = new_coef

172 self.intercept_ = new_intercept

173

174

175class DeNormalizableLinearRegression(LinearRegression,

176 DeNormalizableLinearModelMixIn):

177 """

178 A Denormalizable linear regression. The old normalized coefficients are

179 kept in a new field named `feature_importances_`

180 """

181 @property

182 def feature_importances_(self):

183 if hasattr(self, '_feature_importances_'):

184 return self._feature_importances_

185 else:

186 return self.coef_

187

188 def denormalize(self,

189 x_scaler: StandardScaler = None,

190 y_scaler: StandardScaler = None):

191 """

192 Denormalizes the model, and saves a copy of the old normalized

193 coefficients in self._feature_importances.

194

195 :param x_scaler:

196 :param y_scaler:

197 :return:

198 """

199 self._feature_importances_ = self.coef_

200 super(DeNormalizableLinearRegression, self).denormalize(x_scaler,

201 y_scaler)

202

203

204# For all other it should work too

205# class DeNormalizableLasso(Lasso, DeNormalizableLinearModelMixIn):

206# pass

Coverage for src/m5py/linreg_utils.py: 16%

78 statements