add starcoder/wizardcoder/santacoder support

ravenscroftj · Jul 29, 2023 · dfa4b5e · dfa4b5e
1 parent 7eb10b2
commit dfa4b5e
Show file tree

Hide file tree

Showing 7 changed files with 864 additions and 0 deletions.
diff --git a/include/turbopilot/model.hpp b/include/turbopilot/model.hpp
@@ -8,6 +8,9 @@
 #include <vector>
 #include <random>
 
+typedef void (*offload_func_t)(struct ggml_tensor * tensor);
+void ggml_nop(struct ggml_tensor * tensor);
+
 struct gpt_vocab
 {
     using id = int32_t;

diff --git a/include/turbopilot/starcoder.hpp b/include/turbopilot/starcoder.hpp
@@ -0,0 +1,79 @@
+#ifndef __TURBOPILOT_STARCODER_H
+#define __TURBOPILOT_STARCODER_H
+
+#include <turbopilot/model.hpp>
+
+// default hparams (GPT-2 117M)
+// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
+struct starcoder_hparams {
+    int32_t n_vocab = 49280;
+    int32_t n_ctx   = 2048;
+    int32_t n_embd  = 2048;
+    int32_t n_head  = 16;
+    int32_t n_layer = 24;
+    int32_t ftype   = 1;
+};
+
+struct starcoder_layer {
+    // normalization
+    struct ggml_tensor * ln_1_g;
+    struct ggml_tensor * ln_1_b;
+
+    struct ggml_tensor * ln_2_g;
+    struct ggml_tensor * ln_2_b;
+
+    // attention
+    struct ggml_tensor * c_attn_attn_w;
+    struct ggml_tensor * c_attn_attn_b;
+
+    struct ggml_tensor * c_attn_proj_w;
+    struct ggml_tensor * c_attn_proj_b;
+
+    // mlp
+    struct ggml_tensor * c_mlp_fc_w;
+    struct ggml_tensor * c_mlp_fc_b;
+
+    struct ggml_tensor * c_mlp_proj_w;
+    struct ggml_tensor * c_mlp_proj_b;
+};
+
+struct starcoder_model {
+    starcoder_hparams hparams;
+
+    // normalization
+    struct ggml_tensor * ln_f_g;
+    struct ggml_tensor * ln_f_b;
+
+    struct ggml_tensor * wte;     // position embedding
+    struct ggml_tensor * wpe;     //    token embedding
+    struct ggml_tensor * lm_head; // language model head
+
+    std::vector<starcoder_layer> layers;
+
+    // key + value memory
+    struct ggml_tensor * memory_k;
+    struct ggml_tensor * memory_v;
+
+    //
+    struct ggml_context * ctx;
+    std::map<std::string, struct ggml_tensor *> tensors;
+};
+
+
+class StarcoderModel : public TurbopilotModel {
+public:
+    StarcoderModel(ModelConfig config, std::mt19937 &rng) : TurbopilotModel(config, rng){
+        this->model = new starcoder_model{};
+        this->vocab = new gpt_vocab{};
+    }
+    virtual ~StarcoderModel();
+    bool load_model(std::string path);
+    virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt);
+
+private:
+    starcoder_model *model = NULL;
+    gpt_vocab *vocab = NULL;
+};
+
+
+#endif //__TURBOPILOT_STARCODER_H
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -9,8 +9,10 @@ add_executable(${TURBOPILOT_TARGET}
   gptj.cpp
   common.cpp
   server.cpp
+  starcoder.cpp
   ../include/turbopilot/model.hpp
   ../include/turbopilot/gptj.hpp
+  ../include/turbopilot/starcoder.hpp
   )
 
 

diff --git a/src/common.cpp b/src/common.cpp
@@ -4,11 +4,15 @@
 #include <cmath>
 #include <random>
 
+void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
+    (void) tensor;
+}
 
 void gpt_vocab::add_special_token(const std::string & token) {
     special_tokens.push_back(token);
 }
 
+
 void gpt_split_words(std::string str, std::vector<std::string>& words) {
     const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
     const std::regex re(pattern);

diff --git a/src/gptj.cpp b/src/gptj.cpp
@@ -249,6 +249,7 @@ bool gptj_eval(
 GPTJModel::~GPTJModel(){
     ggml_free(model->ctx);
     free(model);
+    free(vocab);
 }
 
 bool GPTJModel::load_model(std::string fname) {

diff --git a/src/main.cpp b/src/main.cpp
@@ -9,6 +9,7 @@
 #include <argparse/argparse.hpp>
 
 #include "turbopilot/model.hpp"
+#include "turbopilot/starcoder.hpp"
 #include "turbopilot/gptj.hpp"
 #include "turbopilot/server.hpp"
 
@@ -64,6 +65,9 @@ int main(int argc, char **argv)
     if(model_type.compare("codegen") == 0) {
         spdlog::info("Initializing GPT-J type model for '{}' model", model_type);
         model = new GPTJModel(config, rng);
+    }else if(model_type.compare("starcoder") == 0 || model_type.compare("wizardcoder") == 0){
+        spdlog::info("Initializing Starcoder/Wizardcoder type model for '{}' model type", model_type);
+        model = new StarcoderModel(config, rng);
     }else{
         spdlog::error("Invalid model type: {}", model_type);
     }