Merge branch 'master' of https://github.com/PrideHacks2023/vina

vina-ai · Jun 18, 2023 · 61537b0 · 61537b0
2 parents 4a28b0b + ef3f26a
commit 61537b0
Show file tree

Hide file tree

Showing 15 changed files with 124 additions and 41 deletions.
diff --git a/README.md b/README.md
@@ -1,13 +1,62 @@
 <div align="center">
 
-# vina
+<img src="media/logo.png" alt="vina" />
 
-Ai generated visual novel
+AI-powered visual novel generator
 
 [![crates.io](https://img.shields.io/crates/v/vina.svg)](https://crates.io/crates/vina)
 [![docs.rs](https://docs.rs/vina/badge.svg)](https://docs.rs/vina)
 [![MIT/Apache 2.0](https://img.shields.io/badge/license-MIT%2FApache-blue.svg)](#)
 
 </div>
 
+**VinA** is a visual novel generator. Once you specify a prompt on the type of
+story you want, we generate an entire plot, detailed characters with
+personalities, locations, music, and more. The result is a fully playable
+and polished visual novel you can play.
+
+## Example
+
+With the following prompt:
+```
+Write a sci-fi story about a hackathon project gone haywire, where twofriends are working
+together on a coding project over the weekend. Then, they are sucked into their laptop and
+have to find a way back to reality. They overcome an obstacle and successfully return back home.
+```
+
+We get this visual novel.
+
+## Features
+
+Dynamic facial expressions depending on the dialogue
+<div>
+  <img src="media/lisa_base.png" width="30%" />
+  <img src="media/lisa_cry.png" width="30%" />
+</div>
+<div>
+  <img src="media/alex_base.png" width="30%" />
+  <img src="media/alex_anger.png" width="30%" />
+</div>
+
+Generated background images for each scene
+<div>
+  <img src="media/bg0.png" width="70%" />
+  <img src="media/bg1.png" width="70%" />
+</div>
+
+## Usage
+
+To run **VinA** for yourself, you need the following:
+- An OpenAI API key, find out how to get one [here](https://platform.openai.com/docs/api-reference/authentication)
+- An instance of Automatic1111's stable diffusion web UI, ensure the instance you are using has API support. More info [here](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
+- RenPy, installation instructions are [here](https://renpy.org/doc/html/quickstart.html)
+
+The following environment variables should be set:
+- `REN_PATH`: path to renpy executable
+- `OPENAI_KEY`: your openai API key
+- `NOVELAI_URL`: url to your instance of the stable diffusion web UI
+
+## What's with the name?
+
+**VinA** is an anagram of the much less creative name, 'AI VN'.
 
diff --git a/crates/vina_story/src/api.rs b/crates/vina_story/src/api.rs
@@ -101,17 +101,20 @@ impl ApiClient {
 
 /// Parse a function call
 pub fn parse_fncall<T: DeserializeOwned>(msg: &Value) -> anyhow::Result<T> {
+    let fn_args = parse_fncall_raw(msg)?;
+    let downcasted = serde_json::from_value(fn_args)?;
+    Ok(downcasted)
+}
+
+pub fn parse_fncall_raw(msg: &Value) -> anyhow::Result<Value> {
     let fn_call = &msg["function_call"];
     let fn_name = fn_call["name"].as_str().unwrap();
 
     // TODO hardcoded inner key (since most of time we only have one argument)
     let fn_args = fn_call["arguments"].as_str().unwrap();
     let mut fn_args: Value = serde_json::from_str(fn_args).unwrap();
     let fn_args = fn_args["inner"].take();
-
-    let downcasted = serde_json::from_value(fn_args)?;
-
-    Ok(downcasted)
+    Ok(fn_args)
 }
 
 /// Parse text content
@@ -177,7 +180,9 @@ pub fn get_scenes_fn() -> Value {
                                 "description:": "Descriptive title of the scene based on it's contents",
                             },
                             "music": {
-                                "type": "string",
+                                "enum": [
+                                    "Funky", "Calm", "Dark", "Inspirational", "Bright", "Dramatic", "Happy", "Romantic", "Angry", "Sad"
+                                ],
                                 "description": "Genre of music that should be played in this scene",
                             },
                             "location": {
@@ -196,41 +201,48 @@ pub fn get_scenes_fn() -> Value {
                                         "type": "string",
                                         "description": "Landmarks and objects of focus that are present in the scene. Omit any descriptions of people.",
                                     },
-                                    "mood": {
-                                        "type": "string",
-                                        "description": "Information about the mood. Omit any descriptions of people.",
-                                    },
                                     "time_of_day": {
                                         "type": "string",
                                         "description": "What time of day it is",
                                     },
                                 }
                             },
-                            "script": {
-                                "type": "array",
-                                "items": {
-                                    "type": "object",
-                                    "description": "A line in the script, contains information like the speaker, choose a facial expression from this list: smiling, crying, nervous, excited, blushing to match what is being said, and also what is being said",
-                                    "properties": {
-                                        "speaker": {
-                                            "type": "string",
-                                            "description": "Name of the speaker"
-                                        },
-                                        "facial_expression": {
-                                            "type": "string",
-                                            "description": "Use an emotion from this list: smiling, crying, nervous, excited, blushing to match the dialogue spoken"
-                                        },
-                                        "content": {
-                                            "type": "string",
-                                            "description": "What the speaker actually says"
-                                        }
-                                    }
-                                }
+                        }
+                    }
+                },
+            },
+            "required": ["inner"],
+        }
+    })
+}
 
+pub fn get_script_fn() -> Value {
+    json!({
+        "name": "get_script_fn",
+        "description": "Script to be used in scene",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "inner": {
+                    "type": "array",
+                    "items": {
+                        "type": "object",
+                        "description": "A line in the script, contains information like the speaker, what is being said, and facial expression",
+                        "properties": {
+                            "speaker": {
+                                "type": "string",
+                                "description": "Name of the speaker"
                             },
+                            "facial_expression": {
+                                "type": "string",
+                                "description": "Use an emotion from this list: smiling, crying, nervous, excited, blushing to match the dialogue spoken"
+                            },
+                            "content": {
+                                "type": "string",
+                                "description": "What the speaker says"
+                            }
                         }
                     }
-
                 },
 
             },

diff --git a/crates/vina_story/src/content.rs b/crates/vina_story/src/content.rs
@@ -53,8 +53,6 @@ pub struct Location {
     pub description: String,
     /// Concrete objects and landmarks in the scene
     pub landmarks: String,
-    /// Information on the mood and time of day
-    pub mood: String,
     /// Time of day
     pub time_of_day: String,
 }

diff --git a/crates/vina_story/src/lib.rs b/crates/vina_story/src/lib.rs
@@ -4,7 +4,8 @@ pub mod api;
 pub mod content;
 pub mod music;
 
-use content::Location;
+use content::{Dialogue, Location};
+use serde_json::{json, Value};
 
 use crate::{
     api::*,
@@ -15,22 +16,45 @@ pub fn generate_story(token: &str, prompt: &str) -> anyhow::Result<Game> {
     // Client to generate details of the story
     let mut story_client = ApiClient::new(token);
 
-    story_client.run_prompt(prompt, None).unwrap();
+    let res = story_client.run_prompt(prompt, None).unwrap();
+    let game_name = parse_content(res)?;
 
     story_client
-        .run_prompt("Generate a title for this story", None)
+        .run_prompt("Generate a short game title for this story", None)
         .unwrap();
 
     let res = story_client.run_prompt("Limit the number of characters to a maximum of 3. Give me each of the characters in the story, along with detailed personality, clothing, and physical appearance details (include age, race, gender).", Some(get_characters_fn())).unwrap();
 
     let characters: Vec<Character> = parse_fncall(&res).unwrap();
     // println!("CHARACTERS {:?}", characters);
 
-    let res = story_client.run_prompt("Limit the number of locations to a maximum of 5. Separate the story into multiple scenes, and for each scene give me a long and detailed description of the setting of the scene, omit any descriptions of people, include the name of the location, physical location it takes place in, objects and landmarks in the scene, mood, and time of day. Also create a title each scene that corresponds to the contents of the scene. Furthermore, for each scene, write me a script and return the result in a list with each element as a character's dialogue, and use a facial expression from this list: smiling, crying, nervous, excited, blushing to match the dialogue spoken. Also For each scene, tell me the music genre from this list Funky, Calm, Dark, Inspirational, Bright, Dramatic, Happy, Romantic, Angry, Sad", Some(get_scenes_fn())).unwrap();
+    let res = story_client.run_prompt("Separate the story into multiple scenes, and for each scene give me a long and detailed description of the setting of the scene, omit any descriptions of people, include the name of the location, physical location it takes place in, objects and landmarks in the scene, mood, and time of day. Also create a title each scene that corresponds to the contents of the scene. Furthermore, for each scene, write me a script and return the result in a list with each element as a character's dialogue, and use a facial expression from this list: smiling, crying, nervous, excited, blushing to match the dialogue spoken. Also For each scene, tell me the music genre from this list Funky, Calm, Dark, Inspirational, Bright, Dramatic, Happy, Romantic, Angry, Sad", Some(get_scenes_fn())).unwrap();
 
-    let scenes: Vec<Scene> = parse_fncall(&res).unwrap();
+    let raw_scenes: Value = parse_fncall_raw(&res).unwrap();
     // println!("SCENES {:?}", scenes);
 
+    let mut val_scenes: Vec<Value> = vec![];
+    for (i, raw_scene) in raw_scenes.as_array().unwrap().iter().enumerate() {
+        let scene_number = i + 1;
+
+        let prompt = format!(
+            r#"For scene {scene_number}, write me a script with a lot of speaking. Prioritize number of lines of dialogue. When writing each line of dialogue, take into account the personality and mood of the character as well as the setting. Do not use a narrator. Ensure that the script transitions smoothly into the next scene. Return the result in a list. Also include facial expression from this list: smiling, crying, nervous, excited, blushing to match the dialogue spoken. Output as json."#
+        );
+        let res = story_client
+            .run_prompt(&prompt, Some(get_script_fn()))
+            .unwrap();
+
+        let script: Vec<Dialogue> = parse_fncall(&res).unwrap();
+
+        // construct finished scene
+        let mut obj_scene = raw_scene.as_object().unwrap().clone();
+        obj_scene.insert(String::from("script"), json! {script});
+        val_scenes.push(Value::Object(obj_scene));
+    }
+    // println!("BUILT SCENE {val_scenes:?}");
+
+    let scenes: Vec<Scene> = serde_json::from_value(Value::Array(val_scenes)).unwrap();
+
     let game = Game {
         name: String::from("VinaGame"),
         synopsis: String::new(),
@@ -51,8 +75,8 @@ pub fn generate_location_prompt(token: &str, location: &Location) -> anyhow::Res
     generate_prompt(
         token,
         &format!(
-            "{}. {}. {}. {}",
-            location.description, location.landmarks, location.mood, location.time_of_day
+            "{}. {}. {}",
+            location.description, location.landmarks, location.time_of_day
         ),
     )
 }

diff --git a/media/alex_anger.png b/media/alex_anger.png
diff --git a/media/alex_base.png b/media/alex_base.png
diff --git a/media/bg0.png b/media/bg0.png
diff --git a/media/bg1.png b/media/bg1.png
diff --git a/media/hackathon_bg0.png b/media/hackathon_bg0.png
diff --git a/media/hackathon_bg1.png b/media/hackathon_bg1.png
diff --git a/media/hackathon_jessie.png b/media/hackathon_jessie.png
diff --git a/media/hackathon_peyton.png b/media/hackathon_peyton.png
diff --git a/media/lisa_base.png b/media/lisa_base.png
diff --git a/media/lisa_cry.png b/media/lisa_cry.png
diff --git a/media/logo.png b/media/logo.png