Coverage for src/onorm/normalization_base.py: 100%
20 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 20:22 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 20:22 +0000
1from abc import ABCMeta, abstractmethod
2from typing import Any, Dict
4import numpy as np
7class Normalizer(metaclass=ABCMeta):
8 """
9 Abstract base class for online data normalizers.
11 This class defines the standard API for all normalizers in the onorm package.
12 All normalizers support incremental (online) learning, where the normalization
13 parameters are updated as new data arrives without storing historical data.
15 All concrete normalizer implementations must implement the abstract methods:
16 partial_fit, transform, and reset.
18 Examples
19 --------
20 ```{python}
21 from onorm import MinMaxScaler
22 import numpy as np
23 scaler = MinMaxScaler(n_dim=3)
24 for x in np.random.normal(size=(100, 3)):
25 scaler.partial_fit(x)
26 x_new = np.array([1.0, 2.0, 3.0])
27 x_normalized = scaler.transform(x_new)
28 ```
29 """
31 def __init__(self, **kwargs: object) -> None:
32 """Initialize the normalizer."""
33 pass
35 @abstractmethod
36 def partial_fit(self, x: np.ndarray) -> None:
37 """
38 Incrementally update the normalization model with a new observation.
40 This method updates the internal state of the normalizer based on a new
41 observation without storing the observation itself. This enables online
42 learning with bounded memory usage.
44 Parameters
45 ----------
46 x : np.ndarray
47 A 1-D array representing a new observation. The length should match
48 the n_dim parameter used during initialization.
50 Notes
51 -----
52 This method modifies the normalizer's internal state but does not
53 transform the input data. Use `transform` or `partial_fit_transform`
54 to normalize data.
55 """
56 raise NotImplementedError
58 @abstractmethod
59 def transform(self, x: np.ndarray) -> np.ndarray:
60 """
61 Transform data using the current normalization model.
63 This method applies the normalization transformation based on the
64 statistics learned from previous observations via `partial_fit`.
66 Parameters
67 ----------
68 x : np.ndarray
69 A 1-D array representing an observation to normalize. The length
70 should match the n_dim parameter used during initialization.
72 Returns
73 -------
74 np.ndarray
75 The normalized observation as a 1-D array with the same shape as input.
77 Notes
78 -----
79 This method modifies the input array in-place for efficiency. If you need
80 to preserve the original array, pass a copy: `transform(x.copy())`.
81 """
82 raise NotImplementedError
84 def partial_fit_transform(self, x: np.ndarray) -> np.ndarray:
85 """
86 Update the normalization model and transform the data in one step.
88 This is a convenience method equivalent to calling `partial_fit(x)`
89 followed by `transform(x)`. It updates the model with the new observation
90 and returns the normalized version.
92 Parameters
93 ----------
94 x : np.ndarray
95 A 1-D array representing a new observation to fit and normalize.
97 Returns
98 -------
99 np.ndarray
100 The normalized observation as a 1-D array with the same shape as input.
102 Notes
103 -----
104 The normalization is based on the statistics AFTER incorporating the
105 new observation. This means the first observation will typically not
106 be well-normalized since the model has minimal data.
107 """
108 self.partial_fit(x)
109 return self.transform(x)
111 @abstractmethod
112 def reset(self) -> None:
113 """
114 Reset the normalizer to its initial state.
116 This method clears all learned statistics and returns the normalizer
117 to its initial state, as if no observations have been seen. After
118 calling reset(), the normalizer can be used on a new dataset.
120 Notes
121 -----
122 This is useful when you want to reuse the same normalizer object on
123 a completely different dataset without creating a new instance.
124 """
125 raise NotImplementedError
127 @abstractmethod
128 def to_dict(self) -> Dict[str, Any]:
129 """
130 Serialize the normalizer state to a dictionary.
132 Returns a dictionary containing the normalizer's configuration and
133 learned state in a JSON-serializable format. The dictionary can be
134 stored in databases, files, or transmitted over networks.
136 Returns
137 -------
138 dict
139 Dictionary with keys:
140 - "version": str - Serialization format version
141 - "class": str - Normalizer class name
142 - "config": dict - Configuration parameters
143 - "state": dict - Learned statistics (arrays are base64-encoded)
145 See Also
146 --------
147 from_dict : Deserialize from dictionary
148 to_json : Serialize to JSON string
149 from_json : Deserialize from JSON string
151 Examples
152 --------
153 ```{python}
154 from onorm import StandardScaler
155 import numpy as np
156 scaler = StandardScaler(n_dim=3)
157 for x in np.random.randn(100, 3):
158 scaler.partial_fit(x)
159 data = scaler.to_dict()
160 restored = StandardScaler.from_dict(data)
161 ```
162 """
163 raise NotImplementedError
165 @classmethod
166 @abstractmethod
167 def from_dict(cls, data: Dict[str, Any]) -> "Normalizer":
168 """
169 Deserialize a normalizer from a dictionary.
171 Creates a new normalizer instance from a dictionary created by to_dict().
172 The normalizer will have the exact same configuration and learned state
173 as the original.
175 Parameters
176 ----------
177 data : dict
178 Dictionary created by to_dict() containing serialized state.
180 Returns
181 -------
182 Normalizer
183 Deserialized normalizer instance with restored state.
185 Raises
186 ------
187 ValueError
188 If the data dictionary is for a different normalizer class.
190 See Also
191 --------
192 to_dict : Serialize to dictionary
193 to_json : Serialize to JSON string
194 from_json : Deserialize from JSON string
195 """
196 raise NotImplementedError