diff --git a/apps/shark_studio/api/llm.py b/apps/shark_studio/api/llm.py index acd9abf26c..9e92e58cb5 100644 --- a/apps/shark_studio/api/llm.py +++ b/apps/shark_studio/api/llm.py @@ -6,20 +6,24 @@ import torch llm_model_map = { - "llama2_7b": {"initializer":stateless_llama.export_transformer_model, - "hf_model_name":"meta-llama/Llama-2-7b-chat-hf", - "stop_token":2, - "max_tokens":4096, - } - + "llama2_7b": { + "initializer": stateless_llama.export_transformer_model, + "hf_model_name": "meta-llama/Llama-2-7b-chat-hf", + "stop_token": 2, + "max_tokens": 4096, + } } -class LanguageModel(): - def __init__(self, model_name, hf_auth_token=None, device=None, precision="fp32"): +class LanguageModel: + def __init__( + self, model_name, hf_auth_token=None, device=None, precision="fp32" + ): print(llm_model_map[model_name]) self.hf_model_name = llm_model_map[model_name]["hf_model_name"] - self.torch_ir, self.tokenizer = llm_model_map[model_name]["initializer"](self.hf_model_name, hf_auth_token, compile_to="torch") + self.torch_ir, self.tokenizer = llm_model_map[model_name][ + "initializer" + ](self.hf_model_name, hf_auth_token, compile_to="torch") self.tempfile_name = get_resource_path("llm.torch.tempfile") with open(self.tempfile_name, "w+") as f: f.write(self.torch_ir) @@ -33,20 +37,35 @@ def __init__(self, model_name, hf_auth_token=None, device=None, precision="fp32" self.compile() def compile(self) -> None: - #this comes with keys: "vmfb", "config", and "temp_file_to_unlink". - self.iree_module_dict = get_iree_compiled_module(self.tempfile_name, device=self.device, frontend="torch") - #TODO: delete the temp file + # this comes with keys: "vmfb", "config", and "temp_file_to_unlink". + self.iree_module_dict = get_iree_compiled_module( + self.tempfile_name, device=self.device, frontend="torch" + ) + # TODO: delete the temp file def chat(self, prompt): - history = [] for iter in range(self.max_tokens): - input_tensor = self.tokenizer(prompt, return_tensors="pt").input_ids - device_inputs = [ireert.asdevicearray(self.iree_module_dict["config"], input_tensor)] + input_tensor = self.tokenizer( + prompt, return_tensors="pt" + ).input_ids + device_inputs = [ + ireert.asdevicearray( + self.iree_module_dict["config"], input_tensor + ) + ] if iter == 0: - token = torch.tensor(self.iree_module_dict["vmfb"]["run_initialize"](*device_inputs).to_host()[0][0]) + token = torch.tensor( + self.iree_module_dict["vmfb"]["run_initialize"]( + *device_inputs + ).to_host()[0][0] + ) else: - token = torch.tensor(self.iree_module_dict["vmfb"]["run_forward"](*device_inputs).to_host()[0][0]) + token = torch.tensor( + self.iree_module_dict["vmfb"]["run_forward"]( + *device_inputs + ).to_host()[0][0] + ) history.append(token) yield self.tokenizer.decode(history) @@ -61,12 +80,12 @@ def chat(self, prompt): yield result_output - - - - if __name__ == "__main__": - lm = LanguageModel("llama2_7b", hf_auth_token="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", device="cpu-task") + lm = LanguageModel( + "llama2_7b", + hf_auth_token="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", + device="cpu-task", + ) print("model loaded") for i in lm.chat("Hello, I am a robot."): - print(i) \ No newline at end of file + print(i) diff --git a/apps/shark_studio/api/utils.py b/apps/shark_studio/api/utils.py index 18e38b44fd..bb5e150364 100644 --- a/apps/shark_studio/api/utils.py +++ b/apps/shark_studio/api/utils.py @@ -5,9 +5,10 @@ def get_available_devices(): return ["cpu-task"] + def get_resource_path(relative_path): """Get absolute path to resource, works for dev and for PyInstaller""" base_path = getattr( sys, "_MEIPASS", os.path.dirname(os.path.abspath(__file__)) ) - return os.path.join(base_path, relative_path) \ No newline at end of file + return os.path.join(base_path, relative_path) diff --git a/apps/shark_studio/web/index.py b/apps/shark_studio/web/index.py index 70266ee144..59b66bee23 100644 --- a/apps/shark_studio/web/index.py +++ b/apps/shark_studio/web/index.py @@ -9,9 +9,9 @@ # import before IREE to avoid MLIR library issues import torch_mlir -#import PIL, transformers, sentencepiece # ensures inclusion in pysintaller exe generation -#from apps.stable_diffusion.src import args, clear_all -#import apps.stable_diffusion.web.utils.global_obj as global_obj +# import PIL, transformers, sentencepiece # ensures inclusion in pysintaller exe generation +# from apps.stable_diffusion.src import args, clear_all +# import apps.stable_diffusion.web.utils.global_obj as global_obj def launch_app(address): @@ -35,61 +35,61 @@ def launch_app(address): if __name__ == "__main__": - #if args.debug: + # if args.debug: logging.basicConfig(level=logging.DEBUG) # required to do multiprocessing in a pyinstaller freeze freeze_support() -# if args.api or "api" in args.ui.split(","): -# from apps.stable_diffusion.web.ui import ( -# txt2img_api, -# img2img_api, -# upscaler_api, -# inpaint_api, -# outpaint_api, -# llm_chat_api, -# ) -# -# from fastapi import FastAPI, APIRouter -# import uvicorn -# -# # init global sd pipeline and config -# global_obj._init() -# -# app = FastAPI() -# app.add_api_route("/sdapi/v1/txt2img", txt2img_api, methods=["post"]) -# app.add_api_route("/sdapi/v1/img2img", img2img_api, methods=["post"]) -# app.add_api_route("/sdapi/v1/inpaint", inpaint_api, methods=["post"]) -# app.add_api_route("/sdapi/v1/outpaint", outpaint_api, methods=["post"]) -# app.add_api_route("/sdapi/v1/upscaler", upscaler_api, methods=["post"]) -# -# # chat APIs needed for compatibility with multiple extensions using OpenAI API -# app.add_api_route( -# "/v1/chat/completions", llm_chat_api, methods=["post"] -# ) -# app.add_api_route("/v1/completions", llm_chat_api, methods=["post"]) -# app.add_api_route("/chat/completions", llm_chat_api, methods=["post"]) -# app.add_api_route("/completions", llm_chat_api, methods=["post"]) -# app.add_api_route( -# "/v1/engines/codegen/completions", llm_chat_api, methods=["post"] -# ) -# app.include_router(APIRouter()) -# uvicorn.run(app, host="0.0.0.0", port=args.server_port) -# sys.exit(0) -# + # if args.api or "api" in args.ui.split(","): + # from apps.stable_diffusion.web.ui import ( + # txt2img_api, + # img2img_api, + # upscaler_api, + # inpaint_api, + # outpaint_api, + # llm_chat_api, + # ) + # + # from fastapi import FastAPI, APIRouter + # import uvicorn + # + # # init global sd pipeline and config + # global_obj._init() + # + # app = FastAPI() + # app.add_api_route("/sdapi/v1/txt2img", txt2img_api, methods=["post"]) + # app.add_api_route("/sdapi/v1/img2img", img2img_api, methods=["post"]) + # app.add_api_route("/sdapi/v1/inpaint", inpaint_api, methods=["post"]) + # app.add_api_route("/sdapi/v1/outpaint", outpaint_api, methods=["post"]) + # app.add_api_route("/sdapi/v1/upscaler", upscaler_api, methods=["post"]) + # + # # chat APIs needed for compatibility with multiple extensions using OpenAI API + # app.add_api_route( + # "/v1/chat/completions", llm_chat_api, methods=["post"] + # ) + # app.add_api_route("/v1/completions", llm_chat_api, methods=["post"]) + # app.add_api_route("/chat/completions", llm_chat_api, methods=["post"]) + # app.add_api_route("/completions", llm_chat_api, methods=["post"]) + # app.add_api_route( + # "/v1/engines/codegen/completions", llm_chat_api, methods=["post"] + # ) + # app.include_router(APIRouter()) + # uvicorn.run(app, host="0.0.0.0", port=args.server_port) + # sys.exit(0) + # # Setup to use shark_tmp for gradio's temporary image files and clear any # existing temporary images there if they exist. Then we can import gradio. # It has to be in this order or gradio ignores what we've set up. - #from apps.stable_diffusion.web.utils.gradio_configs import ( + # from apps.stable_diffusion.web.utils.gradio_configs import ( # config_gradio_tmp_imgs_folder, - #) + # ) - #config_gradio_tmp_imgs_folder() + # config_gradio_tmp_imgs_folder() import gradio as gr # Create custom models folders if they don't exist - #from apps.stable_diffusion.web.ui.utils import create_custom_models_folders + # from apps.stable_diffusion.web.ui.utils import create_custom_models_folders - #create_custom_models_folders() + # create_custom_models_folders() def resource_path(relative_path): """Get absolute path to resource, works for dev and for PyInstaller""" @@ -100,74 +100,74 @@ def resource_path(relative_path): dark_theme = resource_path("ui/css/sd_dark_theme.css") - #from apps.stable_diffusion.web.ui import ( - #txt2img_web, - #txt2img_custom_model, - #txt2img_gallery, - #txt2img_png_info_img, - #txt2img_status, - #txt2img_sendto_img2img, - #txt2img_sendto_inpaint, - #txt2img_sendto_outpaint, - #txt2img_sendto_upscaler, - ## h2ogpt_upload, - ## h2ogpt_web, - #img2img_web, - #img2img_custom_model, - #img2img_gallery, - #img2img_init_image, - #img2img_status, - #img2img_sendto_inpaint, - #img2img_sendto_outpaint, - #img2img_sendto_upscaler, - #inpaint_web, - #inpaint_custom_model, - #inpaint_gallery, - #inpaint_init_image, - #inpaint_status, - #inpaint_sendto_img2img, - #inpaint_sendto_outpaint, - #inpaint_sendto_upscaler, - #outpaint_web, - #outpaint_custom_model, - #outpaint_gallery, - #outpaint_init_image, - #outpaint_status, - #outpaint_sendto_img2img, - #outpaint_sendto_inpaint, - #outpaint_sendto_upscaler, - #upscaler_web, - #upscaler_custom_model, - #upscaler_gallery, - #upscaler_init_image, - #upscaler_status, - #upscaler_sendto_img2img, - #upscaler_sendto_inpaint, - #upscaler_sendto_outpaint, - ## lora_train_web, - ## model_web, - ## model_config_web, - #hf_models, - #modelmanager_sendto_txt2img, - #modelmanager_sendto_img2img, - #modelmanager_sendto_inpaint, - #modelmanager_sendto_outpaint, - #modelmanager_sendto_upscaler, - #stablelm_chat, - #minigpt4_web, - #outputgallery_web, - #outputgallery_tab_select, - #outputgallery_watch, - #outputgallery_filename, - #outputgallery_sendto_txt2img, - #outputgallery_sendto_img2img, - #outputgallery_sendto_inpaint, - #outputgallery_sendto_outpaint, - #outputgallery_sendto_upscaler, - #) + # from apps.stable_diffusion.web.ui import ( + # txt2img_web, + # txt2img_custom_model, + # txt2img_gallery, + # txt2img_png_info_img, + # txt2img_status, + # txt2img_sendto_img2img, + # txt2img_sendto_inpaint, + # txt2img_sendto_outpaint, + # txt2img_sendto_upscaler, + ## h2ogpt_upload, + ## h2ogpt_web, + # img2img_web, + # img2img_custom_model, + # img2img_gallery, + # img2img_init_image, + # img2img_status, + # img2img_sendto_inpaint, + # img2img_sendto_outpaint, + # img2img_sendto_upscaler, + # inpaint_web, + # inpaint_custom_model, + # inpaint_gallery, + # inpaint_init_image, + # inpaint_status, + # inpaint_sendto_img2img, + # inpaint_sendto_outpaint, + # inpaint_sendto_upscaler, + # outpaint_web, + # outpaint_custom_model, + # outpaint_gallery, + # outpaint_init_image, + # outpaint_status, + # outpaint_sendto_img2img, + # outpaint_sendto_inpaint, + # outpaint_sendto_upscaler, + # upscaler_web, + # upscaler_custom_model, + # upscaler_gallery, + # upscaler_init_image, + # upscaler_status, + # upscaler_sendto_img2img, + # upscaler_sendto_inpaint, + # upscaler_sendto_outpaint, + ## lora_train_web, + ## model_web, + ## model_config_web, + # hf_models, + # modelmanager_sendto_txt2img, + # modelmanager_sendto_img2img, + # modelmanager_sendto_inpaint, + # modelmanager_sendto_outpaint, + # modelmanager_sendto_upscaler, + # stablelm_chat, + # minigpt4_web, + # outputgallery_web, + # outputgallery_tab_select, + # outputgallery_watch, + # outputgallery_filename, + # outputgallery_sendto_txt2img, + # outputgallery_sendto_img2img, + # outputgallery_sendto_inpaint, + # outputgallery_sendto_outpaint, + # outputgallery_sendto_upscaler, + # ) # init global sd pipeline and config - #global_obj._init() + # global_obj._init() def register_button_click(button, selectedid, inputs, outputs): button.click( @@ -213,17 +213,17 @@ def register_outputgallery_button(button, selectedid, inputs, outputs): # destination of one of the 'send to' buttons. If you do have to change # that id, make sure you update the relevant register_button_click calls # further down with the new id. - #with gr.TabItem(label="Text-to-Image", id=0): + # with gr.TabItem(label="Text-to-Image", id=0): # txt2img_web.render() - #with gr.TabItem(label="Image-to-Image", id=1): + # with gr.TabItem(label="Image-to-Image", id=1): # img2img_web.render() - #with gr.TabItem(label="Inpainting", id=2): + # with gr.TabItem(label="Inpainting", id=2): # inpaint_web.render() - #with gr.TabItem(label="Outpainting", id=3): + # with gr.TabItem(label="Outpainting", id=3): # outpaint_web.render() - #with gr.TabItem(label="Upscaler", id=4): + # with gr.TabItem(label="Upscaler", id=4): # upscaler_web.render() - #if args.output_gallery: + # if args.output_gallery: # with gr.TabItem(label="Output Gallery", id=5) as og_tab: # outputgallery_web.render() @@ -248,7 +248,7 @@ def register_outputgallery_button(button, selectedid, inputs, outputs): ## label="Generate Sharding Config (Experimental)", id=9 ## ): ## model_config_web.render() - #with gr.TabItem(label="MultiModal (Experimental)", id=10): + # with gr.TabItem(label="MultiModal (Experimental)", id=10): # minigpt4_web.render() # with gr.TabItem(label="DocuChat Upload", id=11): # h2ogpt_upload.render() @@ -256,103 +256,103 @@ def register_outputgallery_button(button, selectedid, inputs, outputs): # h2ogpt_web.render() # send to buttons - #register_button_click( + # register_button_click( # txt2img_sendto_img2img, # 1, # [txt2img_gallery], # [img2img_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # txt2img_sendto_inpaint, # 2, # [txt2img_gallery], # [inpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # txt2img_sendto_outpaint, # 3, # [txt2img_gallery], # [outpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # txt2img_sendto_upscaler, # 4, # [txt2img_gallery], # [upscaler_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # img2img_sendto_inpaint, # 2, # [img2img_gallery], # [inpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # img2img_sendto_outpaint, # 3, # [img2img_gallery], # [outpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # img2img_sendto_upscaler, # 4, # [img2img_gallery], # [upscaler_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # inpaint_sendto_img2img, # 1, # [inpaint_gallery], # [img2img_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # inpaint_sendto_outpaint, # 3, # [inpaint_gallery], # [outpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # inpaint_sendto_upscaler, # 4, # [inpaint_gallery], # [upscaler_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # outpaint_sendto_img2img, # 1, # [outpaint_gallery], # [img2img_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # outpaint_sendto_inpaint, # 2, # [outpaint_gallery], # [inpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # outpaint_sendto_upscaler, # 4, # [outpaint_gallery], # [upscaler_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # upscaler_sendto_img2img, # 1, # [upscaler_gallery], # [img2img_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # upscaler_sendto_inpaint, # 2, # [upscaler_gallery], # [inpaint_init_image, tabs], - #) - #register_button_click( + # ) + # register_button_click( # upscaler_sendto_outpaint, # 3, # [upscaler_gallery], # [outpaint_init_image, tabs], - #) - #if args.output_gallery: + # ) + # if args.output_gallery: # register_outputgallery_button( # outputgallery_sendto_txt2img, # 0, @@ -383,39 +383,39 @@ def register_outputgallery_button(button, selectedid, inputs, outputs): # [outputgallery_filename], # [upscaler_init_image, tabs], # ) - #register_modelmanager_button( + # register_modelmanager_button( # modelmanager_sendto_txt2img, # 0, # [hf_models], # [txt2img_custom_model, tabs], - #) - #register_modelmanager_button( + # ) + # register_modelmanager_button( # modelmanager_sendto_img2img, # 1, # [hf_models], # [img2img_custom_model, tabs], - #) - #register_modelmanager_button( + # ) + # register_modelmanager_button( # modelmanager_sendto_inpaint, # 2, # [hf_models], # [inpaint_custom_model, tabs], - #) - #register_modelmanager_button( + # ) + # register_modelmanager_button( # modelmanager_sendto_outpaint, # 3, # [hf_models], # [outpaint_custom_model, tabs], - #) - #register_modelmanager_button( + # ) + # register_modelmanager_button( # modelmanager_sendto_upscaler, # 4, # [hf_models], # [upscaler_custom_model, tabs], - #) + # ) sd_web.queue() - #if args.ui == "app": + # if args.ui == "app": # t = Process( # target=launch_app, args=[f"http://localhost:{args.server_port}"] # ) @@ -424,5 +424,5 @@ def register_outputgallery_button(button, selectedid, inputs, outputs): share=True, inbrowser=True, server_name="0.0.0.0", - server_port=11911 #args.server_port, + server_port=11911, # args.server_port, ) diff --git a/apps/shark_studio/web/ui/chat.py b/apps/shark_studio/web/ui/chat.py index 02b06a019e..0ecafb5c85 100644 --- a/apps/shark_studio/web/ui/chat.py +++ b/apps/shark_studio/web/ui/chat.py @@ -10,7 +10,6 @@ from apps.shark_studio.api.llm import ( llm_model_map, LanguageModel, - ) @@ -62,7 +61,6 @@ def user(message, history): } - def create_prompt(model_name, history, prompt_prefix): return "" system_message = "" @@ -97,7 +95,6 @@ def create_prompt(model_name, history, prompt_prefix): return msg - def get_default_config(): return False import torch @@ -124,7 +121,7 @@ def get_default_config(): c.split_into_layers() -#model_vmfb_key = "" +# model_vmfb_key = "" def chat_fn( @@ -399,9 +396,7 @@ def view_json_file(file_obj): with gr.Blocks(title="Chat") as chat_element: with gr.Row(): - model_choices = list( - llm_model_map.keys() - ) + model_choices = list(llm_model_map.keys()) model = gr.Dropdown( label="Select Model", value=model_choices[0], @@ -424,9 +419,9 @@ def view_json_file(file_obj): label="Precision", value="int4", choices=[ - #"int4", - #"int8", - #"fp16", + # "int4", + # "int8", + # "fp16", "fp32", ], visible=False, diff --git a/shark/iree_utils/compile_utils.py b/shark/iree_utils/compile_utils.py index 86419fab4b..b5c4527827 100644 --- a/shark/iree_utils/compile_utils.py +++ b/shark/iree_utils/compile_utils.py @@ -478,7 +478,7 @@ def get_iree_compiled_module( compile_str=compile_str, ) temp_file_to_unlink = None - #TODO: Currently mmap=True control flow path has been switched off for mmap. + # TODO: Currently mmap=True control flow path has been switched off for mmap. # Got to find a cleaner way to unlink/delete the temporary file since # we're setting delete=False when creating NamedTemporaryFile. That's why # I'm getting hold of the name of the temporary file in `temp_file_to_unlink`.