diff --git a/.github/workflows/run_test_and_gen_report.yml b/.github/workflows/run_test_and_gen_report.yml
index caa9eaf7..97e7eb6b 100644
--- a/.github/workflows/run_test_and_gen_report.yml
+++ b/.github/workflows/run_test_and_gen_report.yml
@@ -6,6 +6,10 @@ on:
- 'v*.*.*'
workflow_dispatch:
+# Add permissions configuration
+permissions:
+ contents: write
+
jobs:
test:
runs-on: ubuntu-latest
@@ -76,10 +80,31 @@ jobs:
cd tests
poetry run python run_test.py
- - name: Commit test results
- uses: stefanzweifel/git-auto-commit-action@v4
- with:
- commit_message: Add test results
+ - name: Commit and push report
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: |
+ # Switch to main branch
+ git fetch origin main
+ git checkout main
+
+ git config --global user.name 'github-actions[bot]'
+ git config --global user.email 'github-actions[bot]@users.noreply.github.com'
+
+ # Check if file exists
+ ls -la tests/compatible-models.mdx || echo "Report file not found!"
+
+ # Add all changes (including new files)
+ git add -A
+
+ # Show pending changes
+ git status
+
+ # Create commit with timestamp
+ git commit -m "docs: update compatibility test report" || echo "No changes to commit"
+
+ # Push changes to main branch
+ git push origin main
diff --git a/openagent/conf/llm_provider.py b/openagent/conf/llm_provider.py
index b660b3dd..81695e7a 100644
--- a/openagent/conf/llm_provider.py
+++ b/openagent/conf/llm_provider.py
@@ -11,7 +11,7 @@
from openagent.conf.env import settings
-SUPPORTED_MODELS = {
+SUPPORTED_OLLAMA_MODELS = {
"llama3.2": {"name": "llama3.2", "supports_tools": True},
"mistral-nemo": {"name": "mistral-nemo", "supports_tools": True},
"darkmoon/olmo:7B-instruct-q6-k": {"name": "olmo", "supports_tools": False},
@@ -35,12 +35,12 @@ def get_available_ollama_providers() -> List[str]:
for model in ollama_list["models"]:
full_name = model["name"]
# check if the full model name is in SUPPORTED_MODELS
- if full_name in SUPPORTED_MODELS:
+ if full_name in SUPPORTED_OLLAMA_MODELS:
available_models.append(full_name)
else:
# try to check the base name (without version tag)
base_name = full_name.split(":")[0]
- if base_name in SUPPORTED_MODELS:
+ if base_name in SUPPORTED_OLLAMA_MODELS:
available_models.append(base_name)
return available_models
except Exception as e:
diff --git a/openagent/ui/app.py b/openagent/ui/app.py
index 418a8713..705ade4a 100644
--- a/openagent/ui/app.py
+++ b/openagent/ui/app.py
@@ -8,10 +8,11 @@
from langchain.schema.runnable.config import RunnableConfig
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import HumanMessage
+from langchain_ollama import ChatOllama
from loguru import logger
from openagent.conf.env import settings
-from openagent.conf.llm_provider import SUPPORTED_MODELS, get_available_providers
+from openagent.conf.llm_provider import SUPPORTED_OLLAMA_MODELS, get_available_providers
from openagent.ui.profile import profile_name_to_provider_key, provider_to_profile
from openagent.workflows.member import members
from openagent.workflows.workflow import build_workflow
@@ -108,9 +109,9 @@ async def on_message(message: cl.Message): # noqa
msg = cl.Message(content="")
agent_names = [member["name"] for member in members]
- if hasattr(llm, "model"):
+ if hasattr(llm, "model") and isinstance(llm,ChatOllama):
model_name = llm.model
- supports_tools = SUPPORTED_MODELS.get(model_name, {}).get("supports_tools", False)
+ supports_tools = SUPPORTED_OLLAMA_MODELS.get(model_name, {}).get("supports_tools", False)
else:
supports_tools = True
diff --git a/openagent/ui/profile.py b/openagent/ui/profile.py
index 6e46d51b..98da5c19 100644
--- a/openagent/ui/profile.py
+++ b/openagent/ui/profile.py
@@ -1,6 +1,6 @@
import chainlit as cl
-from openagent.conf.llm_provider import MODELS_ICONS, SUPPORTED_MODELS
+from openagent.conf.llm_provider import MODELS_ICONS, SUPPORTED_OLLAMA_MODELS
provider_key_to_profile_info = {
"gpt-4o": {
@@ -25,7 +25,7 @@
},
}
-for model_key, model_info in SUPPORTED_MODELS.items():
+for model_key, model_info in SUPPORTED_OLLAMA_MODELS.items():
icon = MODELS_ICONS.get(model_info["name"], "/public/ollama.png") # type: ignore
provider_key_to_profile_info[model_key] = {
"name": model_info["name"], # type: ignore
diff --git a/openagent/workflows/workflow.py b/openagent/workflows/workflow.py
index be50f731..20eba928 100644
--- a/openagent/workflows/workflow.py
+++ b/openagent/workflows/workflow.py
@@ -3,6 +3,7 @@
from langchain_core.language_models import BaseChatModel
from langchain_core.messages import BaseMessage, HumanMessage
+from langchain_ollama import ChatOllama
from langgraph.graph import END, StateGraph
from loguru import logger
@@ -11,7 +12,7 @@
from openagent.agents.fallback import build_fallback_agent
from openagent.agents.feed_explore import build_feed_explorer_agent
from openagent.agents.research_analyst import build_research_analyst_agent
-from openagent.conf.llm_provider import SUPPORTED_MODELS
+from openagent.conf.llm_provider import SUPPORTED_OLLAMA_MODELS
class AgentState(TypedDict):
@@ -29,14 +30,15 @@ async def run(state):
def build_workflow(llm: BaseChatModel):
- if hasattr(llm, "model"):
+ is_ollama = isinstance(llm, ChatOllama)
+ if hasattr(llm, "model") and is_ollama:
model_name = llm.model
else:
return build_tool_workflow(llm)
- supports_tools = SUPPORTED_MODELS.get(model_name, {}).get("supports_tools", False)
+ supports_tools = SUPPORTED_OLLAMA_MODELS.get(model_name, {}).get("supports_tools", False)
- if not supports_tools:
+ if not supports_tools and is_ollama:
return build_simple_workflow(llm)
else:
return build_tool_workflow(llm)
diff --git a/poetry.lock b/poetry.lock
index f481eccb..087ea502 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -972,13 +972,13 @@ files = [
[[package]]
name = "google-ai-generativelanguage"
-version = "0.6.4"
+version = "0.6.6"
description = "Google Ai Generativelanguage API client library"
optional = false
python-versions = ">=3.7"
files = [
- {file = "google-ai-generativelanguage-0.6.4.tar.gz", hash = "sha256:1750848c12af96cb24ae1c3dd05e4bfe24867dc4577009ed03e1042d8421e874"},
- {file = "google_ai_generativelanguage-0.6.4-py3-none-any.whl", hash = "sha256:730e471aa549797118fb1c88421ba1957741433ada575cf5dd08d3aebf903ab1"},
+ {file = "google-ai-generativelanguage-0.6.6.tar.gz", hash = "sha256:1739f035caeeeca5c28f887405eec8690f3372daf79fecf26454a97a4f1733a8"},
+ {file = "google_ai_generativelanguage-0.6.6-py3-none-any.whl", hash = "sha256:59297737931f073d55ce1268dcc6d95111ee62850349d2b6cde942b16a4fca5c"},
]
[package.dependencies]
@@ -1014,13 +1014,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"]
[[package]]
name = "google-api-python-client"
-version = "2.129.0"
+version = "2.151.0"
description = "Google API Client Library for Python"
optional = false
python-versions = ">=3.7"
files = [
- {file = "google-api-python-client-2.129.0.tar.gz", hash = "sha256:984cc8cc8eb4923468b1926d2b8effc5b459a4dda3c845896eb87c153b28ef84"},
- {file = "google_api_python_client-2.129.0-py2.py3-none-any.whl", hash = "sha256:d50f7e2dfdbb7fc2732f6a0cba1c54d7bb676390679526c6bb628c901e43ec86"},
+ {file = "google_api_python_client-2.151.0-py2.py3-none-any.whl", hash = "sha256:4427b2f47cd88b0355d540c2c52215f68c337f3bc9d6aae1ceeae4525977504c"},
+ {file = "google_api_python_client-2.151.0.tar.gz", hash = "sha256:a9d26d630810ed4631aea21d1de3e42072f98240aaf184a8a1a874a371115034"},
]
[package.dependencies]
@@ -1286,16 +1286,16 @@ testing = ["pytest"]
[[package]]
name = "google-generativeai"
-version = "0.5.4"
+version = "0.7.2"
description = "Google Generative AI High level API client library and tools."
optional = false
python-versions = ">=3.9"
files = [
- {file = "google_generativeai-0.5.4-py3-none-any.whl", hash = "sha256:036d63ee35e7c8aedceda4f81c390a5102808af09ff3a6e57e27ed0be0708f3c"},
+ {file = "google_generativeai-0.7.2-py3-none-any.whl", hash = "sha256:3117d1ebc92ee77710d4bc25ab4763492fddce9b6332eb25d124cf5d8b78b339"},
]
[package.dependencies]
-google-ai-generativelanguage = "0.6.4"
+google-ai-generativelanguage = "0.6.6"
google-api-core = "*"
google-api-python-client = "*"
google-auth = ">=2.15.0"
@@ -1754,13 +1754,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
[[package]]
name = "langchain-core"
-version = "0.2.24"
+version = "0.2.38"
description = "Building applications with LLMs through composability"
optional = false
python-versions = "<4.0,>=3.8.1"
files = [
- {file = "langchain_core-0.2.24-py3-none-any.whl", hash = "sha256:9444fc082d21ef075d925590a684a73fe1f9688a3d90087580ec929751be55e7"},
- {file = "langchain_core-0.2.24.tar.gz", hash = "sha256:f2e3fa200b124e8c45d270da9bf836bed9c09532612c96ff3225e59b9a232f5a"},
+ {file = "langchain_core-0.2.38-py3-none-any.whl", hash = "sha256:8a5729bc7e68b4af089af20eff44fe4e7ca21d0e0c87ec21cef7621981fd1a4a"},
+ {file = "langchain_core-0.2.38.tar.gz", hash = "sha256:eb69dbedd344f2ee1f15bcea6c71a05884b867588fadc42d04632e727c1238f3"},
]
[package.dependencies]
@@ -1773,21 +1773,22 @@ pydantic = [
]
PyYAML = ">=5.3"
tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
+typing-extensions = ">=4.7"
[[package]]
name = "langchain-google-genai"
-version = "1.0.4"
+version = "1.0.10"
description = "An integration package connecting Google's genai package and LangChain"
optional = false
python-versions = "<4.0,>=3.9"
files = [
- {file = "langchain_google_genai-1.0.4-py3-none-any.whl", hash = "sha256:e567cc401f8d629fce489ee031d258da7fa4b7da0abb8ed926d6990c650b659e"},
- {file = "langchain_google_genai-1.0.4.tar.gz", hash = "sha256:b6beccfe7504ce9f8778a8df23dc49239fd91cf076a55d61759a09fc1373ca26"},
+ {file = "langchain_google_genai-1.0.10-py3-none-any.whl", hash = "sha256:333f5e10ebde45b519b7816d7129cb73c5f5e6ab0df9960fa2c9f339fe9d9068"},
+ {file = "langchain_google_genai-1.0.10.tar.gz", hash = "sha256:d4465aaf50825c78663618259ceca60a323d33b1a09a791631ddc7bd4806f4ce"},
]
[package.dependencies]
-google-generativeai = ">=0.5.2,<0.6.0"
-langchain-core = ">=0.1.45,<0.3"
+google-generativeai = ">=0.7.0,<0.8.0"
+langchain-core = ">=0.2.33,<0.3"
[package.extras]
images = ["pillow (>=10.1.0,<11.0.0)"]
@@ -3134,13 +3135,13 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"]
[[package]]
name = "pyparsing"
-version = "3.1.2"
+version = "3.2.0"
description = "pyparsing module - Classes and methods to define and execute parsing grammars"
optional = false
-python-versions = ">=3.6.8"
+python-versions = ">=3.9"
files = [
- {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"},
- {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"},
+ {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"},
+ {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"},
]
[package.extras]
@@ -3148,20 +3149,20 @@ diagrams = ["jinja2", "railroad-diagrams"]
[[package]]
name = "pytest"
-version = "8.2.2"
+version = "8.3.3"
description = "pytest: simple powerful testing with Python"
optional = false
python-versions = ">=3.8"
files = [
- {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"},
- {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"},
+ {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
+ {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
]
[package.dependencies]
colorama = {version = "*", markers = "sys_platform == \"win32\""}
iniconfig = "*"
packaging = "*"
-pluggy = ">=1.5,<2.0"
+pluggy = ">=1.5,<2"
[package.extras]
dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
@@ -4412,4 +4413,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools",
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
-content-hash = "7fc0b8b9178cce25f4d340bd469bab0a89a15bb4311a4de44850925e5de0e7e4"
+content-hash = "12b02c526f80608f14fd5ca9de26986302203a66fc23a07f38c40b5c454a18ce"
diff --git a/pyproject.toml b/pyproject.toml
index 2ff03bc0..b035dbe5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,6 @@ sqlalchemy-utils = "^0.41.2"
retrying = "^1.3.4"
langchain-postgres = "0.0.9"
psycopg2-binary = "^2.9.9"
-langchain-google-genai = "^1.0.4"
psycopg-binary = "^3.1.19"
langchain-core = ">=0.2.9,<0.3"
langgraph="0.1.1"
@@ -50,6 +49,8 @@ moralis = "^0.1.49"
pytest-asyncio = "^0.23.8"
feedparser = "^6.0.11"
jinja2 = "^3.1.4"
+langchain-google-genai = "<2.0.4"
+pytest = "^8.3.3"
[tool.poetry.group.dev.dependencies]
ruff = "^0.4.1"
diff --git a/tests/compatible-models.mdx b/tests/compatible-models.mdx
index 8a9789c2..206d0661 100644
--- a/tests/compatible-models.mdx
+++ b/tests/compatible-models.mdx
@@ -30,13 +30,17 @@ Learn more:
-| Name | Score (out of 100) |
-|------|-------------------|
-
+| Name | Score (out of 100) | Function Call Support |
+|------|-------------------|---------------------|
+| mistral-nemo | 88 | ✅ |
+| llama3.2 | 80 | ✅ |
+| darkmoon/olmo:7B-instruct-q6-k | 0 | ❌ |
### Proprietary Models
-| Name | Score (out of 100) |
-|------|-------------------|
-| gpt-4o-mini | 100 |
-| gpt-4o | 100 |
\ No newline at end of file
+| Name | Score (out of 100) | Function Call Support |
+|------|-------------------|---------------------|
+| gpt-4o-mini | 100 | ✅ |
+| gemini-1.5-flash | 100 | ✅ |
+| gpt-4o | 96 | ✅ |
+| gemini-1.5-pro | 96 | ✅ |
\ No newline at end of file
diff --git a/tests/run_test.py b/tests/run_test.py
index 3d6db347..ae4788d8 100644
--- a/tests/run_test.py
+++ b/tests/run_test.py
@@ -4,6 +4,20 @@
import pytest
from jinja2 import Environment, FileSystemLoader
+# Global model configurations
+PROPRIETARY_MODELS = [
+ {"name": "gpt-4o-mini", "function_call_support": True},
+ {"name": "gpt-4o", "function_call_support": True},
+ {"name": "gemini-1.5-flash", "function_call_support": True},
+ {"name": "gemini-1.5-pro", "function_call_support": True},
+]
+
+OPENSOURCE_MODELS = [
+ {"name": "llama3.2", "function_call_support": True},
+ {"name": "mistral-nemo", "function_call_support": True},
+ # {"name": "darkmoon/olmo:7B-instruct-q6-k", "function_call_support": False}
+]
+
class TestStats:
def __init__(self):
@@ -38,16 +52,29 @@ def pytest_terminal_summary(self, terminalreporter, exitstatus, config):
def generate_model_report(proprietary_results, opensource_results):
- # Convert results format
- proprietary_models = [
- {'name': model_name, 'score': score}
- for model_name, score in proprietary_results.items()
- ]
-
- open_source_models = [
- {'name': model_name, 'score': score}
- for model_name, score in opensource_results.items()
- ]
+ def bool_to_emoji(value):
+ return "✅" if value else "❌"
+
+ # Convert results format and sort by score
+ proprietary_models = []
+ for model in PROPRIETARY_MODELS:
+ if model['name'] in proprietary_results:
+ proprietary_models.append({
+ 'name': model['name'],
+ 'score': proprietary_results[model['name']],
+ 'function_call_support': bool_to_emoji(model['function_call_support'])
+ })
+ proprietary_models.sort(key=lambda x: x['score'], reverse=True)
+
+ open_source_models = []
+ for model in OPENSOURCE_MODELS:
+ if model['name'] in opensource_results:
+ open_source_models.append({
+ 'name': model['name'],
+ 'score': opensource_results[model['name']],
+ 'function_call_support': bool_to_emoji(model['function_call_support'])
+ })
+ open_source_models.sort(key=lambda x: x['score'], reverse=True)
# Set up template environment
env = Environment(loader=FileSystemLoader('templates'))
@@ -73,32 +100,21 @@ def run_model_tests(model_name):
return stats.calculate_model_score()
-def run_all_tests(proprietary_models, opensource_models):
+def run_all_tests():
proprietary_results = {}
opensource_results = {}
# Test proprietary models
- for model in proprietary_models:
- proprietary_results[model] = run_model_tests(model)
+ for model in PROPRIETARY_MODELS:
+ proprietary_results[model['name']] = run_model_tests(model['name'])
# Test open source models
- for model in opensource_models:
- opensource_results[model] = run_model_tests(model)
+ for model in OPENSOURCE_MODELS:
+ opensource_results[model['name']] = run_model_tests(model['name'])
# Generate report
generate_model_report(proprietary_results, opensource_results)
if __name__ == "__main__":
- # Proprietary model list
- proprietary_models = [
- "gpt-4o-mini",
- "gpt-4o",
- ]
-
- # Open source model list
- opensource_models = [
- # "llama3.2",
- ]
-
- run_all_tests(proprietary_models, opensource_models)
+ run_all_tests()
diff --git a/tests/templates/compatible-models.mdx.j2 b/tests/templates/compatible-models.mdx.j2
index 76abc58a..5bc8f70f 100644
--- a/tests/templates/compatible-models.mdx.j2
+++ b/tests/templates/compatible-models.mdx.j2
@@ -30,16 +30,16 @@ Learn more:
-| Name | Score (out of 100) |
-|------|-------------------|
-{% for model in open_source_models -%}
-| {{ model.name }} | {{ model.score }} |
+| Name | Score (out of 100) | Function Call Support |
+|------|-------------------|---------------------|
+{%- for model in open_source_models %}
+| {{ model.name }} | {{ model.score }} | {{ model.function_call_support }} |
{%- endfor %}
### Proprietary Models
-| Name | Score (out of 100) |
-|------|-------------------|
+| Name | Score (out of 100) | Function Call Support |
+|------|-------------------|---------------------|
{%- for model in proprietary_models %}
-| {{ model.name }} | {{ model.score }} |
+| {{ model.name }} | {{ model.score }} | {{ model.function_call_support }} |
{%- endfor %}
\ No newline at end of file