From d83c8073c697b261335e318d904992ea69a0c672 Mon Sep 17 00:00:00 2001 From: Zijian Zhang Date: Mon, 9 Oct 2023 14:57:39 -0400 Subject: [PATCH] docs: update document --- docs/.vuepress/styles/index.scss | 2 +- docs/README.md | 6 +++ docs/index.md | 2 +- docs/writings/1. Indexing is non-trivial.md | 10 ++-- .../2.1 Method of Loci and sparsity.md | 10 ++-- docs/writings/3. Copernicus and indexing.md | 46 ++++++++++++------- .../4. Continuous and discrete knowledge.md | 10 ++-- evonote/model/chat.py | 24 +++++++--- 8 files changed, 73 insertions(+), 37 deletions(-) create mode 100644 docs/README.md diff --git a/docs/.vuepress/styles/index.scss b/docs/.vuepress/styles/index.scss index 07667a5..b00d355 100644 --- a/docs/.vuepress/styles/index.scss +++ b/docs/.vuepress/styles/index.scss @@ -26,7 +26,7 @@ --c-border-dark: #dfe2e5; // custom container colors - --c-tip: #42b983; + --c-tip: #4a8ee5; --c-tip-bg: var(--c-bg-light); --c-tip-title: var(--c-text); --c-tip-text: var(--c-text); diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 0000000..097ecc4 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,6 @@ +Dev mode for the docs +```shell +pnpm docs:dev +``` + +If you want to edit the sidebars: See `.vuepress/config.ts`. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 54d74f6..b574fbc 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,3 +1,3 @@ ![](./evonote.svg) -Welcome to the Evonote documentation! \ No newline at end of file +Welcome to the EvoNote documentation! \ No newline at end of file diff --git a/docs/writings/1. Indexing is non-trivial.md b/docs/writings/1. Indexing is non-trivial.md index 4c73023..c795e30 100644 --- a/docs/writings/1. Indexing is non-trivial.md +++ b/docs/writings/1. Indexing is non-trivial.md @@ -4,11 +4,11 @@ What does it mean by understanding? Usually, it is use as the opposite of just memorizing. When you memorize something, you just remember it. But when you understand something, there are a few more magic happening: -> You know which part of the knowledge is relevant to the context. +- You know which part of the knowledge is relevant to the context. This is quite difficult because the context usually doesn't match the knowledge exactly. For example, if the doctor says: "don't drink any water". You may think that you can drink juice, but you can't drink juice either. You can't drink anything. -> You know how different parts of the knowledge are related. +- You know how different parts of the knowledge are related. This is even more difficult. The reason is two-fold: @@ -55,8 +55,8 @@ With embedding-based search in hand, it seems what left for us to build is simpl # Related works -History of retrieval: https://dl.acm.org/doi/pdf/10.1145/3486250 +[History of retrieval](https://dl.acm.org/doi/pdf/10.1145/3486250) -LlamaIndex: https://www.llamaindex.ai/ +[LlamaIndex](https://www.llamaindex.ai/) -ACL 2023 Tutorial on Retrieval-based Language Models: https://acl2023-retrieval-lm.github.io/ \ No newline at end of file +[ACL 2023 Tutorial on Retrieval-based Language Models](https://acl2023-retrieval-lm.github.io/) \ No newline at end of file diff --git a/docs/writings/2.1 Method of Loci and sparsity.md b/docs/writings/2.1 Method of Loci and sparsity.md index 6ae9b3a..1b85b48 100644 --- a/docs/writings/2.1 Method of Loci and sparsity.md +++ b/docs/writings/2.1 Method of Loci and sparsity.md @@ -7,14 +7,18 @@ When you try to memorize a list of things, you can just imagine a place you are ## Why is the method good? Why this method is efficient? Here is the claim: -> Method of Loci is efficient because it creates a graph of knowledge with each node has only limited number of edges. That is, it is a sparse graph. - +::: tip Claim +Method of Loci is efficient because it creates a graph of knowledge with each node has only limited number of edges. That is, it is a sparse graph. +::: Here, in the graph of knowledge, the nodes are the context (situation) and the edges leads to the memory or another situation. The whole point of method of loci is to turn a list of things, which is densely indexed, into a sparsely connected structure. ## Why sparse graph is good? Here is the claim: -> Sparse graph performs better because it fits in context window of human brain better. + +::: tip Claim +Sparse graph performs better because it fits in context window of human brain better. +::: Thinking with a sparse graph limits the number of things you need to think about at one time. In this meantime, because the knowledge are still interconnected, you can still think about the whole knowledge. diff --git a/docs/writings/3. Copernicus and indexing.md b/docs/writings/3. Copernicus and indexing.md index 7314f96..a8bc68c 100644 --- a/docs/writings/3. Copernicus and indexing.md +++ b/docs/writings/3. Copernicus and indexing.md @@ -2,39 +2,50 @@ In the previous article, we found that indexing is deeply related to understanding. However, can we somehow give a definition of understanding? We mentioned in the first article that memorization is different from understanding. So the explicit knowledge must not be the understanding and understanding must be something other than the knowledge itself. Naturally, because the "other" thing must be related to the stand-alone knowledge, we can call it the **implicit context** of the knowledge. With this definition, I claim that -> To understand a thing, you must know that implicit context of it. +::: tip Claim +To understand a thing, you must know that implicit context of it. +::: and consequently, -> The way to understand a thing, is the way to assign the implicit context to it. +::: tip Claim +The way to understand a thing, is the way to assign the implicit context to it. +::: Let's illustrate this with a few examples. -> Modern educated human think they understand earthquake and treat it as a result of the movement of tectonic plates. They think they understand because they can fit the phenomenon of earthquake into his existing knowledge of geology and use it as a context. +::: tip Example +Modern educated human think they understand earthquake and treat it as a result of the movement of tectonic plates. They think they understand because they can fit the phenomenon of earthquake into his existing knowledge of geology and use it as a context. +::: + +::: tip Example +Ancient Japanese people think they understand earthquake and treat it as a result of the movement of a giant catfish supporting the Japanese islands. They think they understand because they can fit the phenomenon of earthquake into his existing knowledge of mythology and use it as a context. +::: -> Ancient Japanese people think they understand earthquake and treat it as a result of the movement of a giant catfish supporting the Japanese islands. They think they understand because they can fit the phenomenon of earthquake into his existing knowledge of mythology and use it as a context. - In this example, we show that different people have different understanding of the same thing. They assign different implicit context to one thing and both strongly believes so. Here, I want to emphasize that, we do not care about which context they assign is correct or not. We only care about the fact that they assign different context. Importantly, the context they assign might be both correct, but the context they assign is different. -> A person who believes geocentric model thinks he understands the movement planets because they can perfectly fit into his existing knowledge of astronomy and use it as a context, though in the context planets move in a very complicated way. +::: tip Example +A person who believes geocentric model thinks he understands the movement planets because they can perfectly fit into his existing knowledge of astronomy and use it as a context, though in the context planets move in a very complicated way. +::: -> A person who believes heliocentric model thinks he understands the movement planets because they can perfectly fit into his existing knowledge of astronomy and use it as a context. The context is different from the previous one and the planets move in a very simple way. +::: tip Example +A person who believes heliocentric model thinks he understands the movement planets because they can perfectly fit into his existing knowledge of astronomy and use it as a context. The context is different from the previous one and the planets move in a very simple way. +::: ## Tree indexing as an understanding As we introduced in the previous article, tree indexing can help assign a context to the knowledge. With a tree indexing, we can find existing knowledge that is similar to the incoming ones. With the help of the paths of the existing knowledge, a new path, namely a new context, can be created. This is the way tree indexing helps LLM to understand the knowledge. Specifically, the understanding can be carried out in the following way -> Step 1. Search similar knowledge in the knowledge base. -> -> Step 2. Gather the paths of the similar knowledge. -> -> Step 3. Synthesize new paths for the incoming knowledge. -> -> Step 4. Use the new paths as the context of the incoming knowledge for rephrasing them. -> -> Step 5. Put the rephrased knowledge into the knowledge base. +::: tip Procedure +- Search similar knowledge in the knowledge base. +- Gather the paths of the similar knowledge. +- Synthesize new paths for the incoming knowledge. +- Use the new paths as the context of the incoming knowledge for rephrasing them. +- Put the rephrased knowledge into the knowledge base. +::: + ## Tree transformation as a transformation of understanding @@ -48,5 +59,6 @@ LLM can understand sentences. Where is the implicit context? My interpretation i # Related works -A Contextual Approach to Scientific Understanding: https://link.springer.com/article/10.1007/s11229-005-5000-4 +[A Contextual Approach to Scientific Understanding](https://link.springer.com/article/10.1007/s11229-005-5000-4) +[Memory is a modeling system](https://doi.org/10.1111/mila.12220) diff --git a/docs/writings/4. Continuous and discrete knowledge.md b/docs/writings/4. Continuous and discrete knowledge.md index a40a2c2..3c11bf9 100644 --- a/docs/writings/4. Continuous and discrete knowledge.md +++ b/docs/writings/4. Continuous and discrete knowledge.md @@ -7,7 +7,9 @@ Surely, there are many criteria to classify knowledge. The important thing is ho ### What is discrete knowledge? -> Discrete knowledge is the ones whose state is defined in a discrete space. Variation on it cannot be infinitesimal. +::: tip Definition +Discrete knowledge is the ones whose state is defined in a discrete space. Variation on it cannot be infinitesimal. +::: For example, a coin has two states: head and tail. The state of a coin is discrete knowledge. @@ -22,7 +24,9 @@ However, not all fields have concrete assumptions. In the long debate of rationa ### What is continuous knowledge? -> Continuous knowledge is the ones whose state is defined in a continuous space. It allows an infinitesimal variation. +::: tip Definition +Continuous knowledge is the ones whose state is defined in a continuous space. It allows an infinitesimal variation. +::: For example, the probability that a coin will be head is continuous knowledge. The probability is a real number between 0 and 1. @@ -50,7 +54,7 @@ EvoNote is trying to add more discrete structure to the continuous knowledge. Here, we first claim that the knowledge need to be interpreted by large language models are continuous. Though they might look like discrete because they are symbols, but they are meaningless symbols without an interpreter. ->Admittedly, you can parse a sentence into a syntax tree. But syntax tree can never represent the accurate meaning. For example, I can set a question: +> Admittedly, you can parse a sentence into a syntax tree. But syntax tree can never represent the accurate meaning. For example, I can set a question: > "If apple means eat in the next sentence. 'Mike apple an apple.' What did Mike intake?" >This question is easy for human to answer but will break any natural language parser. diff --git a/evonote/model/chat.py b/evonote/model/chat.py index 9fcdfa6..15ec8db 100644 --- a/evonote/model/chat.py +++ b/evonote/model/chat.py @@ -31,25 +31,22 @@ def __init__(self, user_message=None, system_message: any = None): if user_message is not None: self._add_message(user_message, "user") + """ + ## Message editing and output + """ + def _add_message(self, content: any, role: str): self.history.append({ "content": content, "role": role }) - def ask(self, content: any): - self.add_user_message(content) - def add_user_message(self, content: any): self._add_message(content, "user") def add_assistant_message(self, content: any): self._add_message(content, "assistant") - def __copy__(self): - new_chat_log = Chat(system_message=self.system_message) - new_chat_log.history = copy.deepcopy(self.history) - return new_chat_log def get_log_list(self): """ @@ -68,6 +65,10 @@ def get_log_list(self): }) return res + """ + ## Chat completion functions + """ + def complete_chat(self, options=None): options = options or {} if use_openai_model(options): @@ -92,6 +93,10 @@ def complete_chat_expensive(self, options=None): chat_logger.add_log(self) return res + """ + ## Magic methods + """ + def __str__(self): res = [] log_list = self.get_log_list() @@ -102,6 +107,11 @@ def __str__(self): def __repr__(self): return f"<{self.__class__.__name__}> {self.system_message!r}" + def __copy__(self): + new_chat_log = Chat(system_message=self.system_message) + new_chat_log.history = copy.deepcopy(self.history) + return new_chat_log + def use_openai_model(options) -> bool: return options.get("model", "gpt-3.5-turbo") in openai_model_list