Skip to content
This repository has been archived by the owner on Sep 30, 2023. It is now read-only.

Commit

Permalink
add starcoder/wizardcoder/santacoder support
Browse files Browse the repository at this point in the history
  • Loading branch information
ravenscroftj committed Jul 29, 2023
1 parent 7eb10b2 commit dfa4b5e
Show file tree
Hide file tree
Showing 7 changed files with 864 additions and 0 deletions.
3 changes: 3 additions & 0 deletions include/turbopilot/model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <vector>
#include <random>

typedef void (*offload_func_t)(struct ggml_tensor * tensor);
void ggml_nop(struct ggml_tensor * tensor);

struct gpt_vocab
{
using id = int32_t;
Expand Down
79 changes: 79 additions & 0 deletions include/turbopilot/starcoder.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#ifndef __TURBOPILOT_STARCODER_H
#define __TURBOPILOT_STARCODER_H

#include <turbopilot/model.hpp>

// default hparams (GPT-2 117M)
// https://huggingface.co/bigcode/gpt_bigcode-santacoder/blob/main/config.json
struct starcoder_hparams {
int32_t n_vocab = 49280;
int32_t n_ctx = 2048;
int32_t n_embd = 2048;
int32_t n_head = 16;
int32_t n_layer = 24;
int32_t ftype = 1;
};

struct starcoder_layer {
// normalization
struct ggml_tensor * ln_1_g;
struct ggml_tensor * ln_1_b;

struct ggml_tensor * ln_2_g;
struct ggml_tensor * ln_2_b;

// attention
struct ggml_tensor * c_attn_attn_w;
struct ggml_tensor * c_attn_attn_b;

struct ggml_tensor * c_attn_proj_w;
struct ggml_tensor * c_attn_proj_b;

// mlp
struct ggml_tensor * c_mlp_fc_w;
struct ggml_tensor * c_mlp_fc_b;

struct ggml_tensor * c_mlp_proj_w;
struct ggml_tensor * c_mlp_proj_b;
};

struct starcoder_model {
starcoder_hparams hparams;

// normalization
struct ggml_tensor * ln_f_g;
struct ggml_tensor * ln_f_b;

struct ggml_tensor * wte; // position embedding
struct ggml_tensor * wpe; // token embedding
struct ggml_tensor * lm_head; // language model head

std::vector<starcoder_layer> layers;

// key + value memory
struct ggml_tensor * memory_k;
struct ggml_tensor * memory_v;

//
struct ggml_context * ctx;
std::map<std::string, struct ggml_tensor *> tensors;
};


class StarcoderModel : public TurbopilotModel {
public:
StarcoderModel(ModelConfig config, std::mt19937 &rng) : TurbopilotModel(config, rng){
this->model = new starcoder_model{};
this->vocab = new gpt_vocab{};
}
virtual ~StarcoderModel();
bool load_model(std::string path);
virtual std::stringstream predict(std::string prompt, int max_length, bool include_prompt);

private:
starcoder_model *model = NULL;
gpt_vocab *vocab = NULL;
};


#endif //__TURBOPILOT_STARCODER_H
2 changes: 2 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ add_executable(${TURBOPILOT_TARGET}
gptj.cpp
common.cpp
server.cpp
starcoder.cpp
../include/turbopilot/model.hpp
../include/turbopilot/gptj.hpp
../include/turbopilot/starcoder.hpp
)


Expand Down
4 changes: 4 additions & 0 deletions src/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
#include <cmath>
#include <random>

void llama_nop(struct ggml_tensor * tensor) { // don't offload by default
(void) tensor;
}

void gpt_vocab::add_special_token(const std::string & token) {
special_tokens.push_back(token);
}


void gpt_split_words(std::string str, std::vector<std::string>& words) {
const std::string pattern = R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)";
const std::regex re(pattern);
Expand Down
1 change: 1 addition & 0 deletions src/gptj.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ bool gptj_eval(
GPTJModel::~GPTJModel(){
ggml_free(model->ctx);
free(model);
free(vocab);
}

bool GPTJModel::load_model(std::string fname) {
Expand Down
4 changes: 4 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <argparse/argparse.hpp>

#include "turbopilot/model.hpp"
#include "turbopilot/starcoder.hpp"
#include "turbopilot/gptj.hpp"
#include "turbopilot/server.hpp"

Expand Down Expand Up @@ -64,6 +65,9 @@ int main(int argc, char **argv)
if(model_type.compare("codegen") == 0) {
spdlog::info("Initializing GPT-J type model for '{}' model", model_type);
model = new GPTJModel(config, rng);
}else if(model_type.compare("starcoder") == 0 || model_type.compare("wizardcoder") == 0){
spdlog::info("Initializing Starcoder/Wizardcoder type model for '{}' model type", model_type);
model = new StarcoderModel(config, rng);
}else{
spdlog::error("Invalid model type: {}", model_type);
}
Expand Down
Loading

0 comments on commit dfa4b5e

Please sign in to comment.