From db59bce47c418cafc1a384345f284fafe86d58bb Mon Sep 17 00:00:00 2001
From: David Hagen <david@drhagen.com>
Date: Fri, 14 Jun 2024 07:05:36 -0400
Subject: [PATCH] Remove HashOutput (#68)

Feature can be resurrected when bandwidth exists to make it actually work
---
 src/tensora/codegen/_type_to_c.py             |   7 +-
 src/tensora/ir/types.py                       |  10 -
 .../iteration_graph/outputs/__init__.py       |   1 -
 .../iteration_graph/outputs/_append.py        |  10 +-
 src/tensora/iteration_graph/outputs/_hash.py  | 326 ------------------
 .../iteration_graph/tensora_hash_table.c      | 167 ---------
 tests/codegen/test_ast_to_c.py                |   1 -
 tests/csr_matmul_hash.c                       |  69 ----
 8 files changed, 5 insertions(+), 586 deletions(-)
 delete mode 100644 src/tensora/iteration_graph/outputs/_hash.py
 delete mode 100644 src/tensora/iteration_graph/tensora_hash_table.c
 delete mode 100644 tests/csr_matmul_hash.c

diff --git a/src/tensora/codegen/_type_to_c.py b/src/tensora/codegen/_type_to_c.py
index 5380d24..46329c9 100644
--- a/src/tensora/codegen/_type_to_c.py
+++ b/src/tensora/codegen/_type_to_c.py
@@ -2,7 +2,7 @@
 
 from functools import singledispatch
 
-from ..ir.types import Array, FixedArray, Float, HashTable, Integer, Mode, Pointer, Tensor, Type
+from ..ir.types import Array, FixedArray, Float, Integer, Mode, Pointer, Tensor, Type
 
 
 def space_variable(variable: str | None = None) -> str:
@@ -37,11 +37,6 @@ def type_to_c_mode(self: Mode, variable: str | None = None) -> str:
     return "taco_mode_t" + space_variable(variable)
 
 
-@type_to_c.register(HashTable)
-def type_to_c_hash_table(self: HashTable, variable: str | None = None) -> str:
-    return "hash_table_t" + space_variable(variable)
-
-
 @type_to_c.register(Pointer)
 def type_to_c_pointer(self: Pointer, variable: str | None = None) -> str:
     return f"{type_to_c(self.target)}* restrict" + space_variable(variable)
diff --git a/src/tensora/ir/types.py b/src/tensora/ir/types.py
index 55fdbf1..83594ca 100644
--- a/src/tensora/ir/types.py
+++ b/src/tensora/ir/types.py
@@ -8,8 +8,6 @@
     "tensor",
     "Mode",
     "mode",
-    "HashTable",
-    "hash_table",
     "Pointer",
     "Array",
     "FixedArray",
@@ -54,14 +52,6 @@ class Mode(Type):
 mode = Mode()
 
 
-@dataclass(frozen=True, slots=True)
-class HashTable(Type):
-    pass
-
-
-hash_table = HashTable()
-
-
 @dataclass(frozen=True, slots=True)
 class Pointer(Type):
     target: Type
diff --git a/src/tensora/iteration_graph/outputs/__init__.py b/src/tensora/iteration_graph/outputs/__init__.py
index d23c83d..10dc4c7 100644
--- a/src/tensora/iteration_graph/outputs/__init__.py
+++ b/src/tensora/iteration_graph/outputs/__init__.py
@@ -1,4 +1,3 @@
 from ._append import AppendOutput
 from ._base import Output
 from ._bucket import BucketOutput
-from ._hash import HashOutput
diff --git a/src/tensora/iteration_graph/outputs/_append.py b/src/tensora/iteration_graph/outputs/_append.py
index 13a3048..f1886cc 100644
--- a/src/tensora/iteration_graph/outputs/_append.py
+++ b/src/tensora/iteration_graph/outputs/_append.py
@@ -21,7 +21,6 @@
 from ..identifiable_expression import ast as ie_ast
 from ._base import Output
 from ._bucket import BucketOutput
-from ._hash import HashOutput
 
 default_array_size = Multiply(IntegerLiteral(1024), IntegerLiteral(1024))
 
@@ -151,9 +150,8 @@ def next_output(
                 )
                 return next_output, next_output.write_declarations(bucket), SourceBuilder()
             else:
-                next_output = HashOutput(self.output, self.next_layer)
-                return (
-                    next_output,
-                    next_output.write_declarations(),
-                    next_output.write_cleanup(kernel_type),
+                raise NotImplementedError(
+                    "Encountered a sparse output layer preceded by a contraction layer or a later "
+                    "output layer. This requires a hash table to store intermediate outputs, "
+                    "which is not currently implemented."
                 )
diff --git a/src/tensora/iteration_graph/outputs/_hash.py b/src/tensora/iteration_graph/outputs/_hash.py
deleted file mode 100644
index 9ff0a91..0000000
--- a/src/tensora/iteration_graph/outputs/_hash.py
+++ /dev/null
@@ -1,326 +0,0 @@
-__all__ = ["HashOutput"]
-
-from dataclasses import dataclass, replace
-
-from ...format import Mode
-from ...ir import SourceBuilder, types
-from ...ir.ast import (
-    Address,
-    ArrayAllocate,
-    ArrayLiteral,
-    Break,
-    Expression,
-    Free,
-    FunctionCall,
-    IntegerLiteral,
-    LessThan,
-    ModeLiteral,
-    NotEqual,
-    Variable,
-)
-from ...kernel_type import KernelType
-from .._names import dimension_name, layer_pointer, previous_layer_pointer, vals_name
-from .._write_sparse_ir import write_crd_assembly, write_pos_assembly
-from ..identifiable_expression import TensorLayer
-from ..identifiable_expression import ast as ie_ast
-from ._base import Output
-from ._bucket import BucketOutput
-
-
-@dataclass(frozen=True, slots=True)
-class HashOutput(Output):
-    output: ie_ast.Tensor
-    starting_layer: int
-    unfulfilled: set[int]
-
-    def __init__(
-        self, output: ie_ast.Tensor, starting_layer: int, unfulfilled: set[int] | None = None
-    ):
-        object.__setattr__(self, "output", output)
-        object.__setattr__(self, "starting_layer", starting_layer)
-        if unfulfilled is not None:
-            object.__setattr__(self, "unfulfilled", unfulfilled)
-        else:
-            object.__setattr__(
-                self, "unfulfilled", set(range(starting_layer, self.final_dense_index()))
-            )
-
-    def final_dense_index(self):
-        final_dense_index = self.output.order
-        for i in reversed(range(self.starting_layer, self.output.order)):
-            if self.output.modes[i] == Mode.compressed:
-                break
-            else:
-                final_dense_index = i
-
-        return final_dense_index
-
-    def key_number(self, layer: int):
-        number = 0
-        for i in range(self.starting_layer, self.output.order):
-            if layer == i:
-                return number
-            if self.output.modes[i] == Mode.compressed:
-                number += 1
-        return number
-
-    def write_declarations(self) -> SourceBuilder:
-        source = SourceBuilder("Hash table initialization")
-
-        modes = [ModeLiteral(mode) for mode in self.output.modes[self.starting_layer :]]
-        dims = [
-            dimension_name(variable) for variable in self.output.indexes[self.starting_layer :]
-        ]
-
-        source.add_dependency("hash")
-
-        # Construct hash table
-        source.append(self.name().declare(types.hash_table))
-        source.append(
-            self.modes_name().declare(types.Array(types.mode)).assign(ArrayLiteral(modes))
-        )
-        source.append(
-            self.dims_name().declare(types.Array(types.integer)).assign(ArrayLiteral(dims))
-        )
-        source.append(
-            FunctionCall(
-                Variable("hash_construct"),
-                [
-                    Address(self.name()),
-                    IntegerLiteral(len(modes)),
-                    IntegerLiteral(self.final_dense_index() - self.starting_layer),
-                    self.modes_name(),
-                    self.dims_name(),
-                ],
-            )
-        )
-
-        return source
-
-    def write_assignment(self, right_hand_side: str, kernel_type: KernelType) -> SourceBuilder:
-        raise RuntimeError()
-
-    def write_cleanup(self, kernel_type: KernelType) -> SourceBuilder:
-        source = SourceBuilder("Hash table cleanup")
-
-        order_name = self.order_name()
-        loop_name = self.sort_index_name()
-
-        # Argsort the elements by key
-        source.append(
-            self.order_name()
-            .declare(types.Pointer(types.integer))
-            .assign(ArrayAllocate(types.integer, self.name().attr("count")))
-        )
-        source.append(loop_name.declare(types.integer).assign(0))
-        with source.loop(LessThan(loop_name, self.name().attr("count"))):
-            source.append(self.order_name().idx(loop_name).assign(loop_name))
-            source.append(loop_name.increment())
-        source.append(
-            FunctionCall(
-                Variable("qsort_r"),
-                [
-                    self.order_name(),
-                    self.name().attr("count"),
-                    Variable("sizeof(uint32_t)"),  # Temporary hack
-                    Variable("hash_comparator"),
-                    Address(self.name()),
-                ],
-            )
-        )
-
-        # Extract indexes recursively
-        source.append(self.extract_index_name().declare(types.integer).assign(0))
-        source.append(self.write_layer_cleanup(self.starting_layer, kernel_type))
-
-        # Free temporaries
-        source.append(Free(order_name))
-
-        # Free hash table
-        source.append(FunctionCall(Variable("hash_destruct"), [Address(self.name())]))
-
-        return source
-
-    def write_layer_cleanup(self, layer: int, kernel_type: KernelType):
-        source = SourceBuilder()
-
-        if layer < self.final_dense_index():
-            key_number = self.key_number(layer)
-            layer_index = Variable(self.output.indexes[layer])
-            dimension_size = dimension_name(self.output.indexes[layer])
-            position = layer_pointer(self.output.id, layer)
-            previous_position = previous_layer_pointer(self.output.id, layer)
-            end_position = self.end_position(key_number)
-            next_end_position = self.end_position(key_number + 1)
-
-            # Reusable search code
-            # This is not applicable for the final key, which has no next key
-            search_source = SourceBuilder()
-            search_source.append(
-                next_end_position.declare(types.integer).assign(self.extract_index_name())
-            )
-            with search_source.loop(LessThan(next_end_position, end_position)):
-                with search_source.branch(
-                    NotEqual(
-                        self.name()
-                        .attr("keys")
-                        .idx(self.order_name().idx(next_end_position))
-                        .idx(key_number),
-                        layer_index,
-                    )
-                ):
-                    search_source.append(Break())
-                search_source.append(next_end_position.increment())
-
-            # Keys phase
-            if self.output.modes[layer] == Mode.dense:
-                source.append(layer_index.declare(types.integer).assign(0))
-                with source.loop(LessThan(layer_index, dimension_size)):
-                    source.append(
-                        position.declare(types.integer).assign(previous_position.plus(layer_index))
-                    )
-                    source.append(search_source)
-                    source.append(self.write_layer_cleanup(layer + 1, kernel_type))
-                    source.append(layer_index.increment())
-
-                    if layer == self.final_dense_index() - 1:
-                        source.append(self.extract_index_name().increment())
-
-            elif self.output.modes[layer] == Mode.compressed:
-                with source.loop(LessThan(self.extract_index_name(), end_position)):
-                    source.append(
-                        layer_index.declare(types.integer).assign(
-                            self.name()
-                            .attr("keys")
-                            .idx(self.order_name().idx(self.extract_index_name()))
-                            .idx(key_number)
-                        )
-                    )
-                    source.append(search_source)
-                    source.append(self.write_layer_cleanup(layer + 1, kernel_type))
-
-                    if kernel_type.is_assemble():
-                        source.append(write_crd_assembly(TensorLayer(self.output, layer)))
-                    source.append(position.increment())
-
-                    if layer == self.final_dense_index() - 1:
-                        source.append(self.extract_index_name().increment())
-
-            if kernel_type.is_assemble():
-                source.append(write_pos_assembly(TensorLayer(self.output, layer)))
-        elif layer < self.output.order:
-            # Bucket phase
-            layer_index = Variable(self.output.indexes[layer])
-            dimension_size = dimension_name(self.output.indexes[layer])
-            position = layer_pointer(self.output.id, layer)
-            previous_position = previous_layer_pointer(self.output.id, layer)
-            bucket_position = self.bucket_position(layer)
-            previous_bucket_position = self.previous_bucket_position(layer)
-
-            source.append(layer_index.declare(types.integer).assign(0))
-            with source.loop(LessThan(layer_index, dimension_size)):
-                source.append(
-                    position.declare(types.integer).assign(previous_position.plus(layer_index))
-                )
-                source.append(
-                    bucket_position.declare(types.integer).assign(
-                        previous_bucket_position.plus(layer_index)
-                    )
-                )
-                source.append(self.write_layer_cleanup(layer + 1, kernel_type))
-                source.append(layer_index.increment())
-        elif layer == self.output.order:
-            # Final phase
-            vals = vals_name(self.output.name)
-            previous_position = previous_layer_pointer(self.output.id, layer)
-            previous_bucket_position = self.previous_bucket_position(layer)
-            bucket = BucketOutput(
-                self.output, list(range(self.final_dense_index(), self.output.order))
-            )
-            source.append(
-                vals.idx(previous_position).assign(bucket.name().idx(previous_bucket_position))
-            )
-
-        return source
-
-    def next_output(
-        self, iteration_output: TensorLayer | None, kernel_type: KernelType
-    ) -> tuple[Output, SourceBuilder, SourceBuilder]:
-        if iteration_output is None:
-            return self, SourceBuilder(), SourceBuilder()
-        else:
-            next_unfulfilled = self.unfulfilled - {iteration_output.layer}
-            if len(next_unfulfilled) == 0:
-                final_dense_index = self.final_dense_index()
-
-                next_output = BucketOutput(
-                    self.output, list(range(final_dense_index, self.output.order))
-                )
-
-                # Write declaration of bucket
-                source = SourceBuilder()
-
-                key_names = [
-                    Variable(self.output.indexes[layer])
-                    for layer in range(self.starting_layer, final_dense_index)
-                ]
-                key_name = self.key_name()
-
-                source.append(
-                    key_name.declare(types.Array(types.integer)).assign(ArrayLiteral(key_names))
-                )
-                source.append(
-                    next_output.write_declarations(
-                        FunctionCall(
-                            Variable("hash_get_bucket"),
-                            [
-                                Address(self.name()),
-                                key_name,
-                            ],
-                        )
-                    )
-                )
-
-                return next_output, source, SourceBuilder()
-            else:
-                return (
-                    replace(self, unfulfilled=next_unfulfilled),
-                    SourceBuilder(),
-                    SourceBuilder(),
-                )
-
-    def name(self) -> Variable:
-        return Variable("hash_table")
-
-    def modes_name(self) -> Variable:
-        return Variable(f"i_{self.name().name}_modes")
-
-    def dims_name(self) -> Variable:
-        return Variable(f"i_{self.name().name}_dims")
-
-    def key_name(self) -> Variable:
-        return Variable(f"i_{self.name().name}_key")
-
-    def order_name(self) -> Variable:
-        return Variable(f"{self.name().name}_order")
-
-    def sort_index_name(self) -> Variable:
-        return Variable(f"i_{self.name().name}_argsort")
-
-    def extract_index_name(self) -> Variable:
-        return Variable(f"p_{self.name().name}_order")
-
-    def end_position(self, key_number: int) -> Expression:
-        if key_number == 0:
-            return self.name().attr("count")
-        else:
-            return Variable(f"p_{self.name().name}_order_{key_number}_end")
-
-    def bucket_position(self, layer: int):
-        return Variable(layer_pointer(self.output.id, layer).name + "_bucket")
-
-    def previous_bucket_position(self, layer: int):
-        if layer == 0:
-            return IntegerLiteral(0)
-        else:
-            return self.bucket_position(layer - 1)
diff --git a/src/tensora/iteration_graph/tensora_hash_table.c b/src/tensora/iteration_graph/tensora_hash_table.c
deleted file mode 100644
index 8776330..0000000
--- a/src/tensora/iteration_graph/tensora_hash_table.c
+++ /dev/null
@@ -1,167 +0,0 @@
-#include <stdio.h>
-
-static inline uint32_t murmur_32_scramble(uint32_t k) {
-    k *= 0xcc9e2d51;
-    k = (k << 15) | (k >> 17);
-    k *= 0x1b873593;
-    return k;
-}
-
-uint32_t murmur3_32(uint32_t n_sparse, const uint32_t* key) {
-    uint32_t h = 1;
-
-    for (size_t i = 0; i < n_sparse; i++) {
-        h ^= murmur_32_scramble(key[i]);
-        h = (h << 13) | (h >> 19);
-        h = h * 5 + 0xe6546b64;
-    }
-
-    h ^= n_sparse;
-    h ^= h >> 16;
-    h *= 0x85ebca6b;
-    h ^= h >> 13;
-    h *= 0xc2b2ae35;
-    h ^= h >> 16;
-    return h;
-}
-
-struct hash_table_t {
-    uint32_t n_layers;
-    uint32_t dense_start;
-    taco_mode_t *modes;
-    uint32_t n_sparse;
-    uint32_t bucket_size;
-
-    uint32_t count;
-    uint32_t table_capacity;
-    uint32_t entries_capacity;
-    int32_t[] indexes;
-    uint32_t[] keys;
-    double[] values;
-};
-
-void hash_construct(
-    hash_table_t *hash_table,
-    uint32_t n_layers,
-    uint32_t dense_start,
-    taco_mode_t *modes,
-    uint32_t *dimensions
-) {
-    uint32_t n_sparse = dense_start - n_layers
-
-    uint32_t bucket_size = 1;
-    for (uint32_t i = dense_start; i < n_layers; i++) {
-        bucket_size *= dimensions[i];
-    }
-
-    uint32_t table_capacity = 10;  // 1 MB
-    uint32_t entries_capacity = 1024*1024;
-    uint32_t[] indexes = malloc(sizeof(uint32_t) * (1 << table_capacity));
-    for (uint32_t i = 0; i < (1 << table_capacity); i++) {
-        indexes[i] = -1;
-    }
-
-    hash_table->n_layers = n_layers;
-    hash_table->dense_start = dense_start;
-    hash_table->modes = modes;
-    hash_table->n_sparse = n_sparse;
-    hash_table->bucket_size = bucket_size;
-
-    hash_table->count = 0;
-    hash_table->table_capacity = table_capacity;
-    hash_table->entries_capacity = entries_capacity;
-    hash_table->indexes = indexes;
-    hash_table->keys = malloc(sizeof(uint32_t) * n_sparse * entries_capacity);
-    hash_table->values = malloc(sizeof(double) * bucket_size * entries_capacity);
-}
-
-void hash_realloc(hash_table_t *hash_table, uint32_t index) {
-    // Heuristic to expand hash table when it is two thirds full
-    if (index * 3 > (1 << hash_table->table_capacity) * 2) {
-        hash_table->table_capacity++;
-        free(hash_table->indexes);
-
-        // Fill the hash table with the sentinel
-        hash_table->indexes = malloc(sizeof(uint32_t) * (1 << hash_table->table_capacity));
-        for (uint32_t i = 0; i < (1 << table_capacity); i++) {
-            hash_table->indexes[i] = -1;
-        }
-
-        // Reinsert all the locations of elements into the hash table
-        uint32_t mask = 0xffffffffu >> (32 - hash_table->table_capacity);
-        for (uint32_t i = 0; i < hash_table->count; i++) {
-            uint32_t hash_value = murmur3_32(hash_table->n_sparse, key);
-            uint32_t short_hash = hash_value & mask;
-
-            hash_table->indexes[short_hash] = location;
-        }
-    }
-
-    if (index >= hash_table->entries_capacity) {
-        uint32_t entries_capacity = max(hash_table->entries_capacity * 2, index)
-        hash_table->entries_capacity = entries_capacity;
-        hash_table->keys = realloc(hash_table->keys, sizeof(uint32_t) * hash_table->n_sparse * entries_capacity);
-        hash_table->values = realloc(hash_table->values, sizeof(double) * hash_table->bucket_size * entries_capacity);
-    }
-}
-
-double[] hash_get_bucket(hash_table_t *hash_table, uint32_t *key) {
-    uint32_t hash_value = murmur3_32(hash_table->n_sparse, key);
-    uint32_t mask = 0xffffffffu >> (32 - hash_table->table_capacity);
-    uint32_t short_hash = hash_value & mask;
-
-    for (;;) {
-        if (hash_table->indexes[short_hash] == -1) {
-            // Empty location found. Store the key and initialize the bucket.
-            uint32_t location = hash_table->count;
-
-            // Allocate more space, if needed
-            hash_realloc(hash_table*, location);
-
-            hash_table->indexes[short_hash] = location;
-
-            for (uint32_t i = 0; i < hash_table->n_sparse; i++) {
-                hash_table->keys[hash_table->n_sparse * location + i] = key[i];
-            }
-
-            double[] bucket = hash_table->values + location * hash_table->bucket_size;
-            for (uint32_t i = 0; i < bucket_size; i++) {
-                bucket[i] = 0.0;
-            }
-
-            hash_table->count++;
-            return bucket;
-        } else {
-            // Location is occupied
-            uint32_t location = hash_table->indexes[short_hash];
-
-            for (uint32_t i = 0; i < hash_table->n_sparse; i++) {
-                if (hash_table->keys[hash_table->n_sparse * count + i] != key[i]) {
-                    // Location was filled with different key. Increment (mod table capacity) and continue.
-                    short_hash = (short_hash + 1) & mask;
-                    continue;
-                }
-            }
-            // Location was filled with this key already. Return the bucket.
-            return hash_table->values + location * hash_table->bucket_size
-        }
-    }
-}
-
-void hash_destruct(hash_table_t *hash_table) {
-    free(hash_table->keys);
-    free(hash_table->values);
-}
-
-int hash_comparator(uint32_t *left, uint32_t *right, hash_table_t *hash_table) {
-    // Keys cannot be equal so that case can be ignored
-
-    left_key = hash_table->keys[*left * hash_table->n_sparse];
-    right_key = hash_table->keys[*right * hash_table->n_sparse];
-    for (uint32_t i = 0; i < hash_table->n_sparse) {
-        if (left_key[i] > right_key[i]) {
-            return 1;
-        }
-    }
-    return -1;
-}
diff --git a/tests/codegen/test_ast_to_c.py b/tests/codegen/test_ast_to_c.py
index 5bd493f..89b6305 100644
--- a/tests/codegen/test_ast_to_c.py
+++ b/tests/codegen/test_ast_to_c.py
@@ -90,7 +90,6 @@ def clean(string: str) -> str:
     (Declaration(Variable("x"), integer), "int32_t x"),
     (Declaration(Variable("x"), float), "double x"),
     (Declaration(Variable("x"), tensor), "taco_tensor_t x"),
-    (Declaration(Variable("x"), hash_table), "hash_table_t x"),
     (Declaration(Variable("x"), Pointer(float)), "double* restrict x"),
     (Declaration(Variable("x"), Pointer(Pointer(integer))), "int32_t* restrict* restrict x"),
     (Declaration(Variable("x"), Array(float)), "double x[]"),
diff --git a/tests/csr_matmul_hash.c b/tests/csr_matmul_hash.c
deleted file mode 100644
index a6c3f2b..0000000
--- a/tests/csr_matmul_hash.c
+++ /dev/null
@@ -1,69 +0,0 @@
-int evaluate(taco_tensor_t *a, taco_tensor_t *b, taco_tensor_t *c) {
-  int32_t i_dim = a->dimensions[0];
-  int32_t k_dim = a->dimensions[1];
-  int32_t j_dim = b->dimensions[1];
-  int32_t* restrict a_1_pos = (int32_t*)(a->indices[1][0]);
-  int32_t* restrict a_1_crd = (int32_t*)(a->indices[1][1]);
-  double* restrict a_vals = (double*)(a->vals);
-  int32_t* restrict b_1_pos = (int32_t*)(b->indices[1][0]);
-  int32_t* restrict b_1_crd = (int32_t*)(b->indices[1][1]);
-  double* restrict b_vals = (double*)(b->vals);
-  int32_t* restrict c_1_pos = (int32_t*)(c->indices[1][0]);
-  int32_t* restrict c_1_crd = (int32_t*)(c->indices[1][1]);
-  double* restrict c_vals = (double*)(c->vals);
-
-  a_1_pos = (int32_t*)malloc(sizeof(int32_t) * (a->dimensions[0] + 1));
-  a_1_pos[0] = 0;
-  int32_t a_1_crd_capacity = 1048576;
-  a_1_crd = (int32_t*)malloc(sizeof(int32_t) * a_1_crd_capacity
-  int32_t p_a_0_1 = 0;
-  int32_t a_vals_capacity = 1048576;
-  a_vals = (double*)malloc(sizeof(double) * a_vals_capacity);
-
-  for (int32_t i = 0; i < i_dim; i++) {
-    int32_t p_b_0_0 = i;
-
-    hash_table_t hash_table = hash_construct();
-
-    for (int32_t p_b_0_1 = b_1_pos[p_b_0_0]; p_b_0_1 < b_1_pos[p_b_0_0+1]; p_b_0_1++) {
-      int32_t i_b_0_1 = b_1_crd[p_b_0_1];
-      int32_t j = i_b_0_1;
-      int32_t p_c_0_0 = j;
-
-      for (int32_t p_c_0_1 = c_1_pos[p_c_0_0]; p_c_0_1 < c_1_pos[p_c_0_0+1]; p_c_0_1++) {
-        int32_t i_c_0_1 = c_1_crd[p_c_0_1];
-        int32_t k = i_c_0_1;
-
-        // Once the last sparse index is known, find the item in the hash table, possibly allocating it
-        // This bucket has enough space to store the remaining dense dimensions
-        double[] bucket = hash_insert(&hash_table, {k});
-
-        // Write the dense elements into that bucket
-        bucket[0] = (b_vals[p_b_0_1] * c_vals[p_c_0_1]);
-      }
-    }
-
-    uint32_t[] hash_table_order = malloc(sizeof(uint32_t) * hash_table->count);
-    for (uint32_t i = 0; i < hash_table->count; i++) {
-        hash_table_order[i] = i;
-    }
-
-    qsort_r(hash_table_order, hash_table->count, sizeof(uint32_t), hash_comparator, &hash_table);
-
-    for (uint32_t i_order = 0; i_order < hash_table->count; i++) {
-        a_1_crd[p_a_0_1] = hash_table->keys[i_order * hash_table->n_sparse + 0];
-        for (uint32_t i_bucket = 0; i_bucket < hash_table->bucket_size; i_bucket++) {
-            a_vals[p_a_0_1 + i_bucket] = hash_table->values[i_order * hash_table->bucket_size + i_bucket];
-        }
-        p_a_0_1 = p_a_0_1 + bucket_size;
-    }
-
-    hash_reset(&hash_table);
-  }
-
-  a->indices[1][0] = (unit8_t*)a_1_pos;
-  a->indices[1][1] = (unit8_t*)a_1_crd;
-  a->vals = (uint8_t*)a_vals;
-
-  return 0;
-}
\ No newline at end of file