coreylowman · coreylowman · Oct 7, 2022 · Oct 4, 2022 · Oct 6, 2022 · Oct 6, 2022
diff --git a/src/devices/mod.rs b/src/devices/mod.rs
@@ -20,6 +20,10 @@ pub use select::*;
 mod conv;
 #[cfg(feature = "nightly")]
 pub use conv::*;
+#[cfg(feature = "nightly")]
+mod pool2d;
+#[cfg(feature = "nightly")]
+pub use pool2d::*;
 
 use std::ops::*;
 

diff --git a/src/devices/pool2d.rs b/src/devices/pool2d.rs
@@ -0,0 +1,222 @@
+use super::Cpu;
+
+pub struct PoolMax;
+pub struct PoolMin;
+pub struct PoolAvg;
+
+/// **Requires nightly** 2d convolution with stride and padding specified at trait level.
+///
+/// This allows the rest of the parameters to be inferred by inputs.
+pub trait DevicePool2D<const K: usize, const S: usize, const P: usize, Kind> {
+    /// Forward operation that modifies the `out` image.
+    fn pool_forward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out: &mut [[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+    );
+
+    /// Backward operation that modifies the gradients of img, weight, and bias.
+    fn pool_backward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out_g: &[[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+        inp_g: &mut [[[f32; W]; H]; C],
+    );
+}
+
+impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMax> for Cpu {
+    fn pool_forward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out: &mut [[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let o = &mut out[c][oh][ow];
+                    let mut tmp = f32::NEG_INFINITY;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W {
+                                    tmp = tmp.max(inp[c][y][x]);
+                                }
+                            }
+                        }
+                    }
+                    *o = tmp;
+                }
+            }
+        }
+    }
+
+    fn pool_backward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out_g: &[[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+        inp_g: &mut [[[f32; W]; H]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let o_g = &out_g[c][oh][ow];
+                    let mut tmp = f32::NEG_INFINITY;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W {
+                                    tmp = tmp.max(inp[c][y][x]);
+                                }
+                            }
+                        }
+                    }
+
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W && inp[c][y][x] == tmp {
+                                    inp_g[c][y][x] += o_g;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolMin> for Cpu {
+    fn pool_forward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out: &mut [[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let o = &mut out[c][oh][ow];
+                    let mut tmp = f32::INFINITY;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W {
+                                    tmp = tmp.min(inp[c][y][x]);
+                                }
+                            }
+                        }
+                    }
+                    *o = tmp;
+                }
+            }
+        }
+    }
+
+    fn pool_backward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out_g: &[[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+        inp_g: &mut [[[f32; W]; H]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let o_g = &out_g[c][oh][ow];
+                    let mut tmp = f32::INFINITY;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W {
+                                    tmp = tmp.min(inp[c][y][x]);
+                                }
+                            }
+                        }
+                    }
+
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W && inp[c][y][x] == tmp {
+                                    inp_g[c][y][x] += o_g;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
+
+impl<const K: usize, const S: usize, const P: usize> DevicePool2D<K, S, P, PoolAvg> for Cpu {
+    fn pool_forward<const C: usize, const H: usize, const W: usize>(
+        inp: &[[[f32; W]; H]; C],
+        out: &mut [[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        let inv_k2 = 1.0 / (K * K) as f32;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let o = &mut out[c][oh][ow];
+                    let mut tmp = 0.0;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).checked_sub(P);
+                        for k2 in 0..K {
+                            let x = (ow * S + k2).checked_sub(P);
+                            if let Some((y, x)) = y.zip(x) {
+                                if y < H && x < W {
+                                    tmp += inp[c][y][x];
+                                }
+                            }
+                        }
+                    }
+                    *o = tmp * inv_k2;
+                }
+            }
+        }
+    }
+
+    fn pool_backward<const C: usize, const H: usize, const W: usize>(
+        _inp: &[[[f32; W]; H]; C],
+        out_g: &[[[f32; (W + 2 * P - K) / S + 1]; (H + 2 * P - K) / S + 1]; C],
+        inp_g: &mut [[[f32; W]; H]; C],
+    ) {
+        let out_height = (H + 2 * P - K) / S + 1;
+        let out_width = (W + 2 * P - K) / S + 1;
+        let inv_k2 = 1.0 / (K * K) as f32;
+        for c in 0..C {
+            for oh in 0..out_height {
+                for ow in 0..out_width {
+                    let g = out_g[c][oh][ow] * inv_k2;
+                    for k1 in 0..K {
+                        let y = (oh * S + k1).wrapping_sub(P);
+                        if y < H {
+                            for k2 in 0..K {
+                                let x = (ow * S + k2).wrapping_sub(P);
+                                if x < W {
+                                    inp_g[c][y][x] += g;
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/src/nn/mod.rs b/src/nn/mod.rs
@@ -122,6 +122,21 @@ mod conv;
 #[cfg(feature = "nightly")]
 pub use conv::*;
 
+#[cfg(feature = "nightly")]
+mod pool2d_max;
+#[cfg(feature = "nightly")]
+pub use pool2d_max::*;
+
+#[cfg(feature = "nightly")]
+mod pool2d_min;
+#[cfg(feature = "nightly")]
+pub use pool2d_min::*;
+
+#[cfg(feature = "nightly")]
+mod pool2d_avg;
+#[cfg(feature = "nightly")]
+pub use pool2d_avg::*;
+
 #[cfg(test)]
 mod tests {
     use crate::gradients::{GradientProvider, Gradients};

diff --git a/src/nn/pool2d_avg.rs b/src/nn/pool2d_avg.rs
@@ -0,0 +1,108 @@
+use super::{LoadFromNpz, SaveToNpz};
+use super::{Module, ResetParams};
+use crate::gradients::*;
+use crate::tensor::*;
+use rand::Rng;
+
+/// Average pool with 2d kernel that operates on images (3d) and batches of images (4d).
+/// Each patch reduces to the average of the values in the patch.
+///
+/// Generics:
+/// - `KERNEL_SIZE`: The size of the kernel applied to both width and height of the images.
+/// - `STRIDE`: How far to move the kernel each step. Defaults to `1`
+/// - `PADDING`: How much zero padding to add around the images. Defaults to `0`.
+#[derive(Debug, Default, Clone)]
+pub struct AvgPool2D<const KERNEL_SIZE: usize, const STRIDE: usize = 1, const PADDING: usize = 0>;
+
+impl<const K: usize, const S: usize, const P: usize> CanUpdateWithGradients for AvgPool2D<K, S, P> {
+    fn update<G: GradientProvider>(&mut self, _: &mut G, _: &mut UnusedTensors) {}
+}
+
+impl<const K: usize, const S: usize, const P: usize> ResetParams for AvgPool2D<K, S, P> {
+    fn reset_params<R: Rng>(&mut self, _: &mut R) {}
+}
+
+impl<const K: usize, const S: usize, const P: usize> SaveToNpz for AvgPool2D<K, S, P> {}
+impl<const K: usize, const S: usize, const P: usize> LoadFromNpz for AvgPool2D<K, S, P> {}
+
+impl<
+        const K: usize,
+        const S: usize,
+        const P: usize,
+        const C: usize,
+        const H: usize,
+        const W: usize,
+        T: Tape,
+    > Module<Tensor3D<C, H, W, T>> for AvgPool2D<K, S, P>
+where
+    [(); (W + 2 * P - K) / S + 1]:,
+    [(); (H + 2 * P - K) / S + 1]:,
+{
+    type Output = Tensor3D<C, { (H + 2 * P - K) / S + 1 }, { (W + 2 * P - K) / S + 1 }, T>;
+
+    fn forward(&self, x: Tensor3D<C, H, W, T>) -> Self::Output {
+        x.avg2d::<K, S, P>()
+    }
+}
+
+impl<
+        const K: usize,
+        const S: usize,
+        const P: usize,
+        const B: usize,
+        const C: usize,
+        const H: usize,
+        const W: usize,
+        T: Tape,
+    > Module<Tensor4D<B, C, H, W, T>> for AvgPool2D<K, S, P>
+where
+    [(); (W + 2 * P - K) / S + 1]:,
+    [(); (H + 2 * P - K) / S + 1]:,
+{
+    type Output = Tensor4D<B, C, { (H + 2 * P - K) / S + 1 }, { (W + 2 * P - K) / S + 1 }, T>;
+
+    fn forward(&self, x: Tensor4D<B, C, H, W, T>) -> Self::Output {
+        x.avg2d::<K, S, P>()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_forward_3d_sizes() {
+        type Img = Tensor3D<3, 10, 10>;
+        let _: Tensor3D<3, 8, 8> = AvgPool2D::<3>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 9, 9> = AvgPool2D::<2>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 7, 7> = AvgPool2D::<4>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 4, 4> = AvgPool2D::<3, 2>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 3, 3> = AvgPool2D::<3, 3>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 10, 10> = AvgPool2D::<3, 1, 1>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 12, 12> = AvgPool2D::<3, 1, 2>::default().forward(Img::zeros());
+        let _: Tensor3D<3, 6, 6> = AvgPool2D::<3, 2, 2>::default().forward(Img::zeros());
+    }
+
+    #[test]
+    fn test_forward_4d_sizes() {
+        type Img = Tensor4D<5, 3, 10, 10>;
+        let _: Tensor4D<5, 3, 7, 7> = AvgPool2D::<4>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 8, 8> = AvgPool2D::<3>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 9, 9> = AvgPool2D::<2>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 4, 4> = AvgPool2D::<3, 2>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 3, 3> = AvgPool2D::<3, 3>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 10, 10> = AvgPool2D::<3, 1, 1>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 12, 12> = AvgPool2D::<3, 1, 2>::default().forward(Img::zeros());
+        let _: Tensor4D<5, 3, 6, 6> = AvgPool2D::<3, 2, 2>::default().forward(Img::zeros());
+    }
+
+    #[test]
+    fn test_tuple_pool_sizes() {
+        type A = AvgPool2D<3>;
+        type B = AvgPool2D<1, 1, 1>;
+        type Img = Tensor3D<1, 10, 10>;
+
+        let _: Tensor3D<1, 6, 6> = <(A, A)>::default().forward(Tensor3D::<1, 10, 10>::zeros());
+        let _: Tensor3D<1, 8, 8> = <(A, A, B)>::default().forward(Img::zeros());
+    }
+}