Coverage for src/onorm/minmax.py: 100%
41 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 20:22 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-07 20:22 +0000
1import base64
2import json
3from typing import Any, Dict
5import numpy as np
7from .normalization_base import Normalizer
10class MinMaxScaler(Normalizer):
11 r"""
12 Online min-max scaler for feature normalization to [0, 1] range.
14 Tracks the running minimum and maximum for each feature and scales values
15 to the range [0, 1] based on these statistics. The normalization is updated
16 incrementally as new observations arrive.
18 For each feature $i$ at time $t$, tracks:
20 $$\begin{aligned}\text{min}_i &= \min\{x_{1,i}, \ldots, x_{t,i}\}\\
21 \text{max}_i = \max\{x_{1,i}, \ldots, x_{t,i}\}\end{aligned}$$
23 And transforms values as:
25 $$x_{\text{norm},i} = \frac{x_i - \text{min}_i}{\text{max}_i - \text{min}_i}$$
27 Parameters
28 ----------
29 n_dim : int
30 Number of dimensions/features to normalize.
32 Attributes
33 ----------
34 min : np.ndarray
35 Running minimum for each feature, shape (n_dim,).
36 max : np.ndarray
37 Running maximum for each feature, shape (n_dim,).
39 Examples
40 --------
41 ```{python}
42 from onorm import MinMaxScaler
43 import numpy as np
44 scaler = MinMaxScaler(n_dim=3)
45 X = np.random.uniform(-5, 5, size=(100, 3))
46 for x in X:
47 scaler.partial_fit(x)
48 x_new = np.array([2.0, -1.0, 3.0])
49 x_normalized = scaler.transform(x_new.copy())
50 assert np.all((x_normalized >= 0) & (x_normalized <= 1))
51 ```
53 Notes
54 -----
55 - If a feature has constant values (min == max), the transformed value
56 will be 0 to avoid division by zero.
57 - This scaler is sensitive to outliers since min/max can be heavily
58 influenced by extreme values.
59 """
61 def __init__(self, n_dim: int) -> None:
62 self.n_dim = n_dim
63 self.reset()
65 def _update_min(self, x: np.ndarray) -> None:
66 """Update running minimum for each feature."""
67 self.min = np.fmin(self.min, x)
69 def _update_max(self, x: np.ndarray) -> None:
70 """Update running maximum for each feature."""
71 self.max = np.fmax(self.max, x)
73 def partial_fit(self, x: np.ndarray) -> None:
74 """
75 Update the minimum and maximum for each feature.
77 Parameters
78 ----------
79 x : np.ndarray
80 A 1-D array of shape (n_dim,) representing a new observation.
81 """
82 self._update_min(x)
83 self._update_max(x)
85 def transform(self, x: np.ndarray) -> np.ndarray:
86 """
87 Transform features to [0, 1] range using current min/max statistics.
89 Parameters
90 ----------
91 x : np.ndarray
92 A 1-D array of shape (n_dim,) to normalize.
94 Returns
95 -------
96 np.ndarray
97 Normalized array of shape (n_dim,) with values in [0, 1].
99 Notes
100 -----
101 If min == max for a feature (constant feature), returns 0 for that
102 feature to avoid division by zero.
103 """
104 denom = self.max - self.min
105 if np.linalg.norm(denom) <= np.finfo(np.float64).eps:
106 denom = 1
107 return (x - self.min) / denom
109 def reset(self) -> None:
110 """
111 Reset the scaler to initial state.
113 Reinitializes min to positive infinity and max to negative infinity
114 so that the first observation will set both values.
115 """
116 self.min = np.array([np.inf] * self.n_dim)
117 self.max = np.array([-np.inf] * self.n_dim)
119 def to_dict(self) -> Dict[str, Any]:
120 """
121 Serialize the scaler state to a dictionary.
123 Returns a dictionary with JSON-serializable metadata and base64-encoded
124 numpy arrays for efficient storage and database compatibility.
126 Returns
127 -------
128 dict
129 Dictionary with keys:
130 - 'version': str, serialization format version
131 - 'class': str, class name
132 - 'config': dict, configuration parameters
133 - 'state': dict, serialized state arrays (base64-encoded)
135 Examples
136 --------
137 ```{python}
138 from onorm import MinMaxScaler
139 import numpy as np
141 scaler = MinMaxScaler(n_dim=3)
142 X = np.random.uniform(-5, 5, size=(100, 3))
143 for x in X:
144 scaler.partial_fit(x)
146 # Serialize
147 data = scaler.to_dict()
149 # Could save to database
150 # db.execute("INSERT INTO models (config, state) VALUES (%s, %s)",
151 # (json.dumps(data['config']), data['state']))
152 ```
153 """
154 return {
155 "version": "1.0",
156 "class": "MinMaxScaler",
157 "config": {"n_dim": self.n_dim},
158 "state": {
159 "min": base64.b64encode(self.min.tobytes()).decode("ascii"),
160 "max": base64.b64encode(self.max.tobytes()).decode("ascii"),
161 },
162 }
164 @classmethod
165 def from_dict(cls, data: Dict[str, Any]) -> "MinMaxScaler":
166 """
167 Deserialize a scaler from a dictionary.
169 Parameters
170 ----------
171 data : dict
172 Dictionary created by to_dict() containing:
173 - 'version': serialization format version
174 - 'class': class name (must be 'MinMaxScaler')
175 - 'config': configuration parameters
176 - 'state': serialized state arrays
178 Returns
179 -------
180 MinMaxScaler
181 Deserialized scaler instance with restored state.
183 Raises
184 ------
185 ValueError
186 If the data format is invalid or class name doesn't match.
188 Examples
189 --------
190 ```{python}
191 from onorm import MinMaxScaler
193 # Deserialize from saved data
194 data = {
195 "version": "1.0",
196 "class": "MinMaxScaler",
197 "config": {"n_dim": 3},
198 "state": {"min": "...", "max": "..."}
199 }
200 scaler = MinMaxScaler.from_dict(data)
201 ```
202 """
203 if data.get("class") != "MinMaxScaler":
204 raise ValueError(f"Cannot deserialize {data.get('class')} as MinMaxScaler")
206 # Create instance with config
207 config = data["config"]
208 instance = cls(n_dim=config["n_dim"])
210 # Restore state arrays
211 state = data["state"]
212 instance.min = np.frombuffer(base64.b64decode(state["min"]), dtype=np.float64)
213 instance.max = np.frombuffer(base64.b64decode(state["max"]), dtype=np.float64)
215 return instance
217 def to_json(self) -> str:
218 """
219 Serialize the scaler to a JSON string.
221 Returns
222 -------
223 str
224 JSON string representation of the scaler state.
226 Examples
227 --------
228 ```{python}
229 from onorm import MinMaxScaler
231 scaler = MinMaxScaler(n_dim=3)
232 # ... train scaler ...
233 json_str = scaler.to_json()
234 ```
235 """
236 return json.dumps(self.to_dict(), indent=2)
238 @classmethod
239 def from_json(cls, json_str: str) -> "MinMaxScaler":
240 """
241 Deserialize a scaler from a JSON string.
243 Parameters
244 ----------
245 json_str : str
246 JSON string created by to_json().
248 Returns
249 -------
250 MinMaxScaler
251 Deserialized scaler instance.
253 Examples
254 --------
255 ```{python}
256 from onorm import MinMaxScaler
258 # Deserialize from JSON string
259 scaler = MinMaxScaler.from_json(json_str)
260 ```
261 """
262 return cls.from_dict(json.loads(json_str))