Coverage for src/onorm/normalization_base.py: 100%

20 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-07 20:22 +0000

1from abc import ABCMeta, abstractmethod 

2from typing import Any, Dict 

3 

4import numpy as np 

5 

6 

7class Normalizer(metaclass=ABCMeta): 

8 """ 

9 Abstract base class for online data normalizers. 

10 

11 This class defines the standard API for all normalizers in the onorm package. 

12 All normalizers support incremental (online) learning, where the normalization 

13 parameters are updated as new data arrives without storing historical data. 

14 

15 All concrete normalizer implementations must implement the abstract methods: 

16 partial_fit, transform, and reset. 

17 

18 Examples 

19 -------- 

20 ```{python} 

21 from onorm import MinMaxScaler 

22 import numpy as np 

23 scaler = MinMaxScaler(n_dim=3) 

24 for x in np.random.normal(size=(100, 3)): 

25 scaler.partial_fit(x) 

26 x_new = np.array([1.0, 2.0, 3.0]) 

27 x_normalized = scaler.transform(x_new) 

28 ``` 

29 """ 

30 

31 def __init__(self, **kwargs: object) -> None: 

32 """Initialize the normalizer.""" 

33 pass 

34 

35 @abstractmethod 

36 def partial_fit(self, x: np.ndarray) -> None: 

37 """ 

38 Incrementally update the normalization model with a new observation. 

39 

40 This method updates the internal state of the normalizer based on a new 

41 observation without storing the observation itself. This enables online 

42 learning with bounded memory usage. 

43 

44 Parameters 

45 ---------- 

46 x : np.ndarray 

47 A 1-D array representing a new observation. The length should match 

48 the n_dim parameter used during initialization. 

49 

50 Notes 

51 ----- 

52 This method modifies the normalizer's internal state but does not 

53 transform the input data. Use `transform` or `partial_fit_transform` 

54 to normalize data. 

55 """ 

56 raise NotImplementedError 

57 

58 @abstractmethod 

59 def transform(self, x: np.ndarray) -> np.ndarray: 

60 """ 

61 Transform data using the current normalization model. 

62 

63 This method applies the normalization transformation based on the 

64 statistics learned from previous observations via `partial_fit`. 

65 

66 Parameters 

67 ---------- 

68 x : np.ndarray 

69 A 1-D array representing an observation to normalize. The length 

70 should match the n_dim parameter used during initialization. 

71 

72 Returns 

73 ------- 

74 np.ndarray 

75 The normalized observation as a 1-D array with the same shape as input. 

76 

77 Notes 

78 ----- 

79 This method modifies the input array in-place for efficiency. If you need 

80 to preserve the original array, pass a copy: `transform(x.copy())`. 

81 """ 

82 raise NotImplementedError 

83 

84 def partial_fit_transform(self, x: np.ndarray) -> np.ndarray: 

85 """ 

86 Update the normalization model and transform the data in one step. 

87 

88 This is a convenience method equivalent to calling `partial_fit(x)` 

89 followed by `transform(x)`. It updates the model with the new observation 

90 and returns the normalized version. 

91 

92 Parameters 

93 ---------- 

94 x : np.ndarray 

95 A 1-D array representing a new observation to fit and normalize. 

96 

97 Returns 

98 ------- 

99 np.ndarray 

100 The normalized observation as a 1-D array with the same shape as input. 

101 

102 Notes 

103 ----- 

104 The normalization is based on the statistics AFTER incorporating the 

105 new observation. This means the first observation will typically not 

106 be well-normalized since the model has minimal data. 

107 """ 

108 self.partial_fit(x) 

109 return self.transform(x) 

110 

111 @abstractmethod 

112 def reset(self) -> None: 

113 """ 

114 Reset the normalizer to its initial state. 

115 

116 This method clears all learned statistics and returns the normalizer 

117 to its initial state, as if no observations have been seen. After 

118 calling reset(), the normalizer can be used on a new dataset. 

119 

120 Notes 

121 ----- 

122 This is useful when you want to reuse the same normalizer object on 

123 a completely different dataset without creating a new instance. 

124 """ 

125 raise NotImplementedError 

126 

127 @abstractmethod 

128 def to_dict(self) -> Dict[str, Any]: 

129 """ 

130 Serialize the normalizer state to a dictionary. 

131 

132 Returns a dictionary containing the normalizer's configuration and 

133 learned state in a JSON-serializable format. The dictionary can be 

134 stored in databases, files, or transmitted over networks. 

135 

136 Returns 

137 ------- 

138 dict 

139 Dictionary with keys: 

140 - "version": str - Serialization format version 

141 - "class": str - Normalizer class name 

142 - "config": dict - Configuration parameters 

143 - "state": dict - Learned statistics (arrays are base64-encoded) 

144 

145 See Also 

146 -------- 

147 from_dict : Deserialize from dictionary 

148 to_json : Serialize to JSON string 

149 from_json : Deserialize from JSON string 

150 

151 Examples 

152 -------- 

153 ```{python} 

154 from onorm import StandardScaler 

155 import numpy as np 

156 scaler = StandardScaler(n_dim=3) 

157 for x in np.random.randn(100, 3): 

158 scaler.partial_fit(x) 

159 data = scaler.to_dict() 

160 restored = StandardScaler.from_dict(data) 

161 ``` 

162 """ 

163 raise NotImplementedError 

164 

165 @classmethod 

166 @abstractmethod 

167 def from_dict(cls, data: Dict[str, Any]) -> "Normalizer": 

168 """ 

169 Deserialize a normalizer from a dictionary. 

170 

171 Creates a new normalizer instance from a dictionary created by to_dict(). 

172 The normalizer will have the exact same configuration and learned state 

173 as the original. 

174 

175 Parameters 

176 ---------- 

177 data : dict 

178 Dictionary created by to_dict() containing serialized state. 

179 

180 Returns 

181 ------- 

182 Normalizer 

183 Deserialized normalizer instance with restored state. 

184 

185 Raises 

186 ------ 

187 ValueError 

188 If the data dictionary is for a different normalizer class. 

189 

190 See Also 

191 -------- 

192 to_dict : Serialize to dictionary 

193 to_json : Serialize to JSON string 

194 from_json : Deserialize from JSON string 

195 """ 

196 raise NotImplementedError