From 2d4756a5f46f877bdefc07b4bd09a9841ba0570d Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <ludwigschneider@uchicago.edu>
Date: Thu, 7 Nov 2024 11:05:01 -0600
Subject: [PATCH 1/2] inter

---
 python/pytest/test_tests.py | 61 +++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/python/pytest/test_tests.py b/python/pytest/test_tests.py
index 61e6702..f058c72 100644
--- a/python/pytest/test_tests.py
+++ b/python/pytest/test_tests.py
@@ -9,7 +9,6 @@
 def test_bug1(device):
     nnodes = 15
     graph = ptens.ggraph.random(nnodes, 0.5)
-    print(graph)
     subgraphs = [ptens.subgraph.trivial(), ptens.subgraph.edge()]
     node_values = torch.rand(nnodes, 1, requires_grad=True)
 
@@ -19,41 +18,57 @@ def test_bug1(device):
         gather_features = ptens.subgraphlayer0.gather(sg, node_attributes)
         result = torch.sum(gather_features)
         result.backward()
-        print(node_values.grad)
 
         # linmap_features = ptens.subgraphlayer0.linmaps(node_attributes)
         result = torch.sum(node_attributes)
         result.backward()
-        print(node_attributes.grad)
 
         check = gradcheck(ptens.subgraphlayer0.gather, (sg, node_attributes), eps=1e-3)
-        print(check)
+        assert check
 
 
 
 class TestGather(object):
 
-    def backprop(self,cls,fn,N,_nc):
-        if(cls==ptens.ptensor0):
-            x=cls.randn(N,_nc)
-        else:
-            atoms=ptens_base.atomspack.random(N,0.3)
-            x=cls.randn(atoms,_nc)
+    
+    def backprop(self,cls, N,nc, device):
+        atoms=ptens_base.atomspack.random(N, nc, 0.3)
+        x=cls.randn(atoms,nc).to(device)
         x.requires_grad_()
         G=ptens.ggraph.random(N,0.3)
-        z=fn(x,G)
-        
-        testvec=z.randn_like()
-        loss=z.inp(testvec).to('cuda')
-        loss.backward(torch.tensor(1.0))
-        xgrad=x.get_grad()
+        atoms2 = G.subgraphs(ptens.subgraph.trivial())
+
+        check = gradcheck(cls.gather, (atoms2, x), eps=1e-3)
+        assert check
+
+        z = cls.gather(atoms2, x)
+        loss=torch.sum(z)
+        loss.backward()
+        xgrad=x.grad
+        print("xgrad", xgrad)
+
 
-        xeps=x.randn_like()
-        z=fn(x+xeps,G)
-        xloss=z.inp(testvec).to('cuda')
-        assert(torch.allclose(xloss-loss,xeps.inp(xgrad),rtol=1e-3, atol=1e-4))
+        h=1e-6
+        xgrad2 = torch.zero_like(xgrad)
+        for i in range(xgrad2.size()):
+            xp = copy.deepcopy(x)
+            xm = copy.deepcopy(x)
+            xp[i] += h
+            xm[i] -= h
+            
+            grad[i] = cls(gather
 
+        z_plus = cls.gather(atoms2, x+h)
+        z_minus = cls.gather(atoms2, x-h)
+        xgrad2 = (z_plus - z_minus)/(2*h)
+        print("xgrad2", xgrad2)
+        
+
+        
+    @pytest.mark.parametrize(('N', 'nc'), [(8, 1), (1, 2), (16, 4)])
+    def test_gather0(self,N, nc, device):
+        self.backprop(ptens.ptensorlayer0,N,nc, device)
 
-    @pytest.mark.parametrize('nc', [1, 2, 4])
-    def test_gather(self,nc):
-        self.backprop(ptens.ptensor0,ptens.gather,8,nc)
+    @pytest.mark.parametrize(('N', 'nc'), [(8, 1), (1, 2), (16, 4)])
+    def test_gather1(self,N, nc, device):
+        self.backprop(ptens.ptensorlayer0,N,nc, device)

From 0b596f931595f0fb2f4ed07889b13de45fce39c5 Mon Sep 17 00:00:00 2001
From: Ludwig Schneider <ludwigschneider@uchicago.edu>
Date: Fri, 8 Nov 2024 09:50:51 -0600
Subject: [PATCH 2/2] add numerical grad tests

---
 python/pytest/conftest.py        | 84 ++++++++++++++++++++++++++++++--
 python/pytest/test_tests.py      | 28 ++++-------
 python/src/ptens/ptensorlayer.py |  5 +-
 3 files changed, 93 insertions(+), 24 deletions(-)

diff --git a/python/pytest/conftest.py b/python/pytest/conftest.py
index b25600a..a75308e 100644
--- a/python/pytest/conftest.py
+++ b/python/pytest/conftest.py
@@ -1,5 +1,5 @@
 import os
-
+import torch
 import pytest
 
 
@@ -23,8 +23,6 @@ def device(ptens_cuda_support):
 
     if "cuda" in device:
         assert ptens_cuda_support
-        import torch
-
         assert torch.cuda.is_available()
 
     return device
@@ -33,3 +31,83 @@ def device(ptens_cuda_support):
 @pytest.fixture(scope="session")
 def float_epsilon():
     return 1e-5
+
+
+def numerical_grad_sum(fn, x, h):
+    grad = torch.zeros_like(x)
+    for i in range(x.numel()):
+        xp = x.clone()
+        xp.view(-1)[i] += h
+        xm = x.clone()
+        xm.view(-1)[i] -= h
+
+        # Using torch.sum here, because torch autograd, calcualtes the partial diff of a scalar valued functino.
+        # With sum, we can a scalar valued function, and the summed parts factorize
+        num_diff = torch.sum(fn(xp)) - torch.sum(fn(xm))
+        grad_value = num_diff / (2 * float(h))        
+        grad.view(-1)[i] = grad_value
+    return grad
+
+@pytest.mark.parametrize("m,c", [(0., 3.), (0.5, -0.3), (-0.8, 0.2)])
+def test_numerical_grad_linear(m, c):
+    def linear(x):
+        return m*x + c
+
+    x = torch.randn((5,10))
+    grad = numerical_grad_sum(linear, x, 1e-2)
+    ana_grad = torch.ones_like(x) * m
+
+    allclose = torch.allclose(ana_grad, grad, rtol=1e-3, atol=1e-5)
+    if not allclose:
+        print(f"Max absolute difference: {torch.max(torch.abs(ana_grad - grad))}")
+        print(f"Mean absolute difference: {torch.mean(torch.abs(ana_grad - grad))}")
+        print(f"Numerical grad range: [{grad.min()}, {grad.max()}]")
+        print(f"Analytical grad range: [{ana_grad.min()}, {ana_grad.max()}]")
+    
+    assert allclose
+
+@pytest.mark.parametrize("a,b,c", [(1. ,2., 3.), (-0.5, 0.4, -0.3), (1.2, -0.8, 0.2)])
+def test_numerical_grad_square(a, b, c):
+    from torch.autograd.gradcheck import gradcheck
+    def square(x):
+        return a*x**2 + b*x + c
+
+    x = torch.randn((5,10))
+    grad = numerical_grad_sum(square, x, 1e-3)
+    ana_grad = 2*a*x + b
+
+    allclose = torch.allclose(ana_grad, grad, rtol=1e-2, atol=1e-2)
+
+    if not allclose:
+        print(f"Max absolute difference: {torch.max(torch.abs(ana_grad - grad))}")
+        print(f"Mean absolute difference: {torch.mean(torch.abs(ana_grad - grad))}")
+        print(f"Numerical grad range: [{grad.min()}, {grad.max()}]")
+        print(f"Analytical grad range: [{ana_grad.min()}, {ana_grad.max()}]")
+    
+    assert allclose
+    x.requires_grad_()
+    assert gradcheck(square, (x,), eps=1e-2, rtol=1e-2, atol=1e-2)
+    
+    
+# Add a test against autograd for validation
+def test_against_autograd():
+    def complex_function(x):
+        return torch.sum(torch.sin(x) + x**2)
+
+    x = torch.randn(5, 10, requires_grad=True)
+    
+    # Compute gradient using autograd
+    y = complex_function(x)
+    y.backward()
+    autograd_grad = x.grad
+
+    # Compute gradient using numerical method
+    numerical_grad = numerical_grad_sum(complex_function, x.detach(), 1e-3)
+
+    allclose = torch.allclose(autograd_grad, numerical_grad, rtol=1e-2, atol=1e-2)
+    if not allclose:
+        print(f"Max absolute difference: {torch.max(torch.abs(autograd_grad - numerical_grad))}")
+        print(f"Mean absolute difference: {torch.mean(torch.abs(autograd_grad - numerical_grad))}")
+    
+
+    assert allclose
diff --git a/python/pytest/test_tests.py b/python/pytest/test_tests.py
index f058c72..27143f7 100644
--- a/python/pytest/test_tests.py
+++ b/python/pytest/test_tests.py
@@ -2,6 +2,7 @@
 import ptens
 import pytest
 import ptens_base
+from conftest import numerical_grad_sum
 
 from torch.autograd.gradcheck import gradcheck
 
@@ -29,7 +30,7 @@ def test_bug1(device):
 
 
 class TestGather(object):
-
+    h=1e-3
     
     def backprop(self,cls, N,nc, device):
         atoms=ptens_base.atomspack.random(N, nc, 0.3)
@@ -38,31 +39,20 @@ def backprop(self,cls, N,nc, device):
         G=ptens.ggraph.random(N,0.3)
         atoms2 = G.subgraphs(ptens.subgraph.trivial())
 
-        check = gradcheck(cls.gather, (atoms2, x), eps=1e-3)
+        check = gradcheck(cls.gather, (atoms2, x), eps=self.h)
         assert check
 
         z = cls.gather(atoms2, x)
         loss=torch.sum(z)
         loss.backward()
         xgrad=x.grad
-        print("xgrad", xgrad)
-
-
-        h=1e-6
-        xgrad2 = torch.zero_like(xgrad)
-        for i in range(xgrad2.size()):
-            xp = copy.deepcopy(x)
-            xm = copy.deepcopy(x)
-            xp[i] += h
-            xm[i] -= h
-            
-            grad[i] = cls(gather
-
-        z_plus = cls.gather(atoms2, x+h)
-        z_minus = cls.gather(atoms2, x-h)
-        xgrad2 = (z_plus - z_minus)/(2*h)
-        print("xgrad2", xgrad2)
+
+
+        fn = lambda x: cls.gather(atoms2, x)
+        xgrad2 = numerical_grad_sum(fn, x, self.h)
         
+        assert torch.allclose(xgrad, xgrad2, rtol=1e-2, atol=1e-2)
+
 
         
     @pytest.mark.parametrize(('N', 'nc'), [(8, 1), (1, 2), (16, 4)])
diff --git a/python/src/ptens/ptensorlayer.py b/python/src/ptens/ptensorlayer.py
index 80a36a9..9d89967 100644
--- a/python/src/ptens/ptensorlayer.py
+++ b/python/src/ptens/ptensorlayer.py
@@ -19,7 +19,7 @@
 
 class ptensorlayer(torch.Tensor):
 
-    covariant_functions=[torch.Tensor.to,torch.Tensor.add,torch.Tensor.sub,torch.relu,torch.nn.functional.linear]
+    covariant_functions=[torch.Tensor.to,torch.Tensor.add,torch.Tensor.sub,torch.relu,torch.nn.functional.linear, torch.Tensor.clone]
 
     @classmethod
     def __torch_function__(cls, func, types, args=(), kwargs=None):
@@ -27,7 +27,8 @@ def __torch_function__(cls, func, types, args=(), kwargs=None):
             kwargs = {}
         if func in ptensorlayer.covariant_functions:
             r= super().__torch_function__(func, types, args, kwargs)
-            r.atoms=args[0].atoms
+            if hasattr(args[0], "atoms"):
+                r.atoms=args[0].atoms
         else:
             r= super().__torch_function__(func, types, args, kwargs)
             if isinstance(r,torch.Tensor):