Coverage for src/m5py/linreg_utils.py: 16%

78 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-22 17:10 +0000

1from abc import abstractmethod, ABCMeta 

2 

3import numpy as np 

4 

5from sklearn.linear_model import LinearRegression 

6from sklearn.linear_model._base import LinearModel 

7from sklearn.preprocessing import StandardScaler 

8from sklearn.utils.extmath import safe_sparse_dot 

9 

10 

11def linreg_model_to_text(model, feature_names=None, target_name=None, 

12 precision=3, line_breaks=False): 

13 """ 

14 Converts a linear regression model to a text representation. 

15 

16 :param model: 

17 :param feature_names: 

18 :param target_name: 

19 :param precision: 

20 :param line_breaks: if True, each term in the sum shows in a different line 

21 :return: 

22 """ 

23 bits = [] 

24 

25 # Template for numbers: we want scientific notation with a given precision 

26 nb_tpl = "%%0.%se" % precision 

27 

28 # Handle multi-dimensional y (its second dim must be size 1, though) 

29 if len(model.coef_.shape) > 1: 

30 assert model.coef_.shape[0] == 1 

31 assert len(model.coef_.shape) == 2 

32 coefs = np.ravel(model.coef_) # convert to 1D 

33 assert len(model.intercept_) == 1 

34 intercept = model.intercept_.item() # extract scalar 

35 else: 

36 coefs = model.coef_ # a 1D array 

37 intercept = model.intercept_ # a scalar already 

38 

39 # First all coefs * drivers 

40 for i, c in enumerate(coefs): 

41 var_name = ("X[%s]" % i) if feature_names is None else feature_names[i] 

42 

43 if i == 0: 

44 # first term 

45 if c < 1: 

46 # use scientific notation 

47 product_text = (nb_tpl + " * %s") % (c, var_name) 

48 else: 

49 # use standard notation with precision 

50 c = np.round(c, precision) 

51 product_text = "%s * %s" % (c, var_name) 

52 else: 

53 # all the other terms: the sign should appear 

54 lb = '\n' if line_breaks else "" 

55 coef_abs = np.abs(c) 

56 coef_sign = '+' if np.sign(c) > 0 else '-' 

57 if coef_abs < 1: 

58 # use scientific notation 

59 product_text = (("%s%s " + nb_tpl + " * %s") 

60 % (lb, coef_sign, coef_abs, var_name)) 

61 else: 

62 # use standard notation with precision 

63 coef_abs = np.round(coef_abs, precision) 

64 product_text = ("%s%s %s * %s" 

65 % (lb, coef_sign, coef_abs, var_name)) 

66 

67 bits.append(product_text) 

68 

69 # Finally the intercept 

70 if len(bits) == 0: 

71 # intercept is the only term in the sum 

72 if intercept < 1: 

73 # use scientific notation only for small numbers (otherwise 12 

74 # would read 1.2e1 ... not friendly) 

75 constant_text = nb_tpl % intercept 

76 else: 

77 # use standard notation with precision 

78 i = np.round(intercept, precision) 

79 constant_text = "%s" % i 

80 else: 

81 # there are other terms in the sum: the sign should appear 

82 lb = '\n' if line_breaks else "" 

83 coef_abs = np.abs(intercept) 

84 coef_sign = '+' if np.sign(intercept) > 0 else '-' 

85 if coef_abs < 1: 

86 # use scientific notation 

87 constant_text = ("%s%s " + nb_tpl) % (lb, coef_sign, coef_abs) 

88 else: 

89 # use standard notation with precision 

90 coef_abs = np.round(coef_abs, precision) 

91 constant_text = "%s%s %s" % (lb, coef_sign, coef_abs) 

92 

93 bits.append(constant_text) 

94 

95 txt = " ".join(bits) 

96 if target_name is not None: 

97 txt = target_name + " = " + txt 

98 

99 return txt 

100 

101 

102class DeNormalizableMixIn(metaclass=ABCMeta): 

103 """ 

104 An abstract class that models able to de-normalize should implement. 

105 """ 

106 __slots__ = () 

107 

108 @abstractmethod 

109 def denormalize(self, 

110 x_scaler: StandardScaler = None, 

111 y_scaler: StandardScaler = None 

112 ): 

113 """ 

114 Denormalizes the model, knowing that it was fit with the given 

115 x_scaler and y_scaler 

116 """ 

117 

118 

119class DeNormalizableLinearModelMixIn(DeNormalizableMixIn, LinearModel): 

120 """ 

121 A mix-in class to add 'denormalization' capability to a linear model 

122 """ 

123 def denormalize(self, 

124 x_scaler: StandardScaler = None, 

125 y_scaler: StandardScaler = None 

126 ): 

127 """ 

128 De-normalizes the linear regression model. 

129 Before this function is executed, 

130 (y-y_mean)/y_scale = self.coef_.T <dot> (x-x_mean)/x_scale + self.intercept_ 

131 so 

132 (y-y_mean)/y_scale = (self.coef_/x_scale).T <dot> x + (self.intercept_ - self.coef_.T <dot> x_mean/x_scale) 

133 that is 

134 (y-y_mean)/y_scale = new_coef.T <dot> x + new_intercept 

135 where 

136 * new_coef = (self.coef_/x_scale) 

137 * new_intercept = (self.intercept_ - (self.intercept_ - self.coef_.T <dot> x_mean/x_scale) 

138 

139 Then going back to y 

140 y = (new_coef * y_scale).T <dot> x + (new_intercept * y_scale + y_mean) 

141 

142 :param self: 

143 :param x_scaler: 

144 :param y_scaler: 

145 :return: 

146 """ 

147 # First save old coefficients 

148 self.normalized_coef_ = self.coef_ 

149 self.normalized_intercept_ = self.intercept_ 

150 

151 # denormalize coefficients to take into account the x normalization 

152 if x_scaler is not None: 

153 new_coef = self.coef_ / x_scaler.scale_ 

154 new_intercept = ( 

155 self.intercept_ - 

156 safe_sparse_dot(x_scaler.mean_, new_coef.T, 

157 dense_output=True) 

158 ) 

159 

160 self.coef_ = new_coef 

161 self.intercept_ = new_intercept 

162 

163 # denormalize them further to take into account the y normalization 

164 if y_scaler is not None: 

165 new_coef = self.coef_ * y_scaler.scale_ 

166 new_intercept = y_scaler.inverse_transform( 

167 np.atleast_1d(self.intercept_) 

168 ) 

169 if np.isscalar(self.intercept_): 

170 new_intercept = new_intercept[0] 

171 self.coef_ = new_coef 

172 self.intercept_ = new_intercept 

173 

174 

175class DeNormalizableLinearRegression(LinearRegression, 

176 DeNormalizableLinearModelMixIn): 

177 """ 

178 A Denormalizable linear regression. The old normalized coefficients are 

179 kept in a new field named `feature_importances_` 

180 """ 

181 @property 

182 def feature_importances_(self): 

183 if hasattr(self, '_feature_importances_'): 

184 return self._feature_importances_ 

185 else: 

186 return self.coef_ 

187 

188 def denormalize(self, 

189 x_scaler: StandardScaler = None, 

190 y_scaler: StandardScaler = None): 

191 """ 

192 Denormalizes the model, and saves a copy of the old normalized 

193 coefficients in self._feature_importances. 

194 

195 :param x_scaler: 

196 :param y_scaler: 

197 :return: 

198 """ 

199 self._feature_importances_ = self.coef_ 

200 super(DeNormalizableLinearRegression, self).denormalize(x_scaler, 

201 y_scaler) 

202 

203 

204# For all other it should work too 

205# class DeNormalizableLasso(Lasso, DeNormalizableLinearModelMixIn): 

206# pass