diff --git a/.github/workflows/run_test_and_gen_report.yml b/.github/workflows/run_test_and_gen_report.yml index caa9eaf7..97e7eb6b 100644 --- a/.github/workflows/run_test_and_gen_report.yml +++ b/.github/workflows/run_test_and_gen_report.yml @@ -6,6 +6,10 @@ on: - 'v*.*.*' workflow_dispatch: +# Add permissions configuration +permissions: + contents: write + jobs: test: runs-on: ubuntu-latest @@ -76,10 +80,31 @@ jobs: cd tests poetry run python run_test.py - - name: Commit test results - uses: stefanzweifel/git-auto-commit-action@v4 - with: - commit_message: Add test results + - name: Commit and push report + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Switch to main branch + git fetch origin main + git checkout main + + git config --global user.name 'github-actions[bot]' + git config --global user.email 'github-actions[bot]@users.noreply.github.com' + + # Check if file exists + ls -la tests/compatible-models.mdx || echo "Report file not found!" + + # Add all changes (including new files) + git add -A + + # Show pending changes + git status + + # Create commit with timestamp + git commit -m "docs: update compatibility test report" || echo "No changes to commit" + + # Push changes to main branch + git push origin main diff --git a/openagent/conf/llm_provider.py b/openagent/conf/llm_provider.py index b660b3dd..81695e7a 100644 --- a/openagent/conf/llm_provider.py +++ b/openagent/conf/llm_provider.py @@ -11,7 +11,7 @@ from openagent.conf.env import settings -SUPPORTED_MODELS = { +SUPPORTED_OLLAMA_MODELS = { "llama3.2": {"name": "llama3.2", "supports_tools": True}, "mistral-nemo": {"name": "mistral-nemo", "supports_tools": True}, "darkmoon/olmo:7B-instruct-q6-k": {"name": "olmo", "supports_tools": False}, @@ -35,12 +35,12 @@ def get_available_ollama_providers() -> List[str]: for model in ollama_list["models"]: full_name = model["name"] # check if the full model name is in SUPPORTED_MODELS - if full_name in SUPPORTED_MODELS: + if full_name in SUPPORTED_OLLAMA_MODELS: available_models.append(full_name) else: # try to check the base name (without version tag) base_name = full_name.split(":")[0] - if base_name in SUPPORTED_MODELS: + if base_name in SUPPORTED_OLLAMA_MODELS: available_models.append(base_name) return available_models except Exception as e: diff --git a/openagent/ui/app.py b/openagent/ui/app.py index 418a8713..705ade4a 100644 --- a/openagent/ui/app.py +++ b/openagent/ui/app.py @@ -8,10 +8,11 @@ from langchain.schema.runnable.config import RunnableConfig from langchain_core.language_models import BaseChatModel from langchain_core.messages import HumanMessage +from langchain_ollama import ChatOllama from loguru import logger from openagent.conf.env import settings -from openagent.conf.llm_provider import SUPPORTED_MODELS, get_available_providers +from openagent.conf.llm_provider import SUPPORTED_OLLAMA_MODELS, get_available_providers from openagent.ui.profile import profile_name_to_provider_key, provider_to_profile from openagent.workflows.member import members from openagent.workflows.workflow import build_workflow @@ -108,9 +109,9 @@ async def on_message(message: cl.Message): # noqa msg = cl.Message(content="") agent_names = [member["name"] for member in members] - if hasattr(llm, "model"): + if hasattr(llm, "model") and isinstance(llm,ChatOllama): model_name = llm.model - supports_tools = SUPPORTED_MODELS.get(model_name, {}).get("supports_tools", False) + supports_tools = SUPPORTED_OLLAMA_MODELS.get(model_name, {}).get("supports_tools", False) else: supports_tools = True diff --git a/openagent/ui/profile.py b/openagent/ui/profile.py index 6e46d51b..98da5c19 100644 --- a/openagent/ui/profile.py +++ b/openagent/ui/profile.py @@ -1,6 +1,6 @@ import chainlit as cl -from openagent.conf.llm_provider import MODELS_ICONS, SUPPORTED_MODELS +from openagent.conf.llm_provider import MODELS_ICONS, SUPPORTED_OLLAMA_MODELS provider_key_to_profile_info = { "gpt-4o": { @@ -25,7 +25,7 @@ }, } -for model_key, model_info in SUPPORTED_MODELS.items(): +for model_key, model_info in SUPPORTED_OLLAMA_MODELS.items(): icon = MODELS_ICONS.get(model_info["name"], "/public/ollama.png") # type: ignore provider_key_to_profile_info[model_key] = { "name": model_info["name"], # type: ignore diff --git a/openagent/workflows/workflow.py b/openagent/workflows/workflow.py index be50f731..20eba928 100644 --- a/openagent/workflows/workflow.py +++ b/openagent/workflows/workflow.py @@ -3,6 +3,7 @@ from langchain_core.language_models import BaseChatModel from langchain_core.messages import BaseMessage, HumanMessage +from langchain_ollama import ChatOllama from langgraph.graph import END, StateGraph from loguru import logger @@ -11,7 +12,7 @@ from openagent.agents.fallback import build_fallback_agent from openagent.agents.feed_explore import build_feed_explorer_agent from openagent.agents.research_analyst import build_research_analyst_agent -from openagent.conf.llm_provider import SUPPORTED_MODELS +from openagent.conf.llm_provider import SUPPORTED_OLLAMA_MODELS class AgentState(TypedDict): @@ -29,14 +30,15 @@ async def run(state): def build_workflow(llm: BaseChatModel): - if hasattr(llm, "model"): + is_ollama = isinstance(llm, ChatOllama) + if hasattr(llm, "model") and is_ollama: model_name = llm.model else: return build_tool_workflow(llm) - supports_tools = SUPPORTED_MODELS.get(model_name, {}).get("supports_tools", False) + supports_tools = SUPPORTED_OLLAMA_MODELS.get(model_name, {}).get("supports_tools", False) - if not supports_tools: + if not supports_tools and is_ollama: return build_simple_workflow(llm) else: return build_tool_workflow(llm) diff --git a/poetry.lock b/poetry.lock index f481eccb..087ea502 100644 --- a/poetry.lock +++ b/poetry.lock @@ -972,13 +972,13 @@ files = [ [[package]] name = "google-ai-generativelanguage" -version = "0.6.4" +version = "0.6.6" description = "Google Ai Generativelanguage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google-ai-generativelanguage-0.6.4.tar.gz", hash = "sha256:1750848c12af96cb24ae1c3dd05e4bfe24867dc4577009ed03e1042d8421e874"}, - {file = "google_ai_generativelanguage-0.6.4-py3-none-any.whl", hash = "sha256:730e471aa549797118fb1c88421ba1957741433ada575cf5dd08d3aebf903ab1"}, + {file = "google-ai-generativelanguage-0.6.6.tar.gz", hash = "sha256:1739f035caeeeca5c28f887405eec8690f3372daf79fecf26454a97a4f1733a8"}, + {file = "google_ai_generativelanguage-0.6.6-py3-none-any.whl", hash = "sha256:59297737931f073d55ce1268dcc6d95111ee62850349d2b6cde942b16a4fca5c"}, ] [package.dependencies] @@ -1014,13 +1014,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-api-python-client" -version = "2.129.0" +version = "2.151.0" description = "Google API Client Library for Python" optional = false python-versions = ">=3.7" files = [ - {file = "google-api-python-client-2.129.0.tar.gz", hash = "sha256:984cc8cc8eb4923468b1926d2b8effc5b459a4dda3c845896eb87c153b28ef84"}, - {file = "google_api_python_client-2.129.0-py2.py3-none-any.whl", hash = "sha256:d50f7e2dfdbb7fc2732f6a0cba1c54d7bb676390679526c6bb628c901e43ec86"}, + {file = "google_api_python_client-2.151.0-py2.py3-none-any.whl", hash = "sha256:4427b2f47cd88b0355d540c2c52215f68c337f3bc9d6aae1ceeae4525977504c"}, + {file = "google_api_python_client-2.151.0.tar.gz", hash = "sha256:a9d26d630810ed4631aea21d1de3e42072f98240aaf184a8a1a874a371115034"}, ] [package.dependencies] @@ -1286,16 +1286,16 @@ testing = ["pytest"] [[package]] name = "google-generativeai" -version = "0.5.4" +version = "0.7.2" description = "Google Generative AI High level API client library and tools." optional = false python-versions = ">=3.9" files = [ - {file = "google_generativeai-0.5.4-py3-none-any.whl", hash = "sha256:036d63ee35e7c8aedceda4f81c390a5102808af09ff3a6e57e27ed0be0708f3c"}, + {file = "google_generativeai-0.7.2-py3-none-any.whl", hash = "sha256:3117d1ebc92ee77710d4bc25ab4763492fddce9b6332eb25d124cf5d8b78b339"}, ] [package.dependencies] -google-ai-generativelanguage = "0.6.4" +google-ai-generativelanguage = "0.6.6" google-api-core = "*" google-api-python-client = "*" google-auth = ">=2.15.0" @@ -1754,13 +1754,13 @@ tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" [[package]] name = "langchain-core" -version = "0.2.24" +version = "0.2.38" description = "Building applications with LLMs through composability" optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langchain_core-0.2.24-py3-none-any.whl", hash = "sha256:9444fc082d21ef075d925590a684a73fe1f9688a3d90087580ec929751be55e7"}, - {file = "langchain_core-0.2.24.tar.gz", hash = "sha256:f2e3fa200b124e8c45d270da9bf836bed9c09532612c96ff3225e59b9a232f5a"}, + {file = "langchain_core-0.2.38-py3-none-any.whl", hash = "sha256:8a5729bc7e68b4af089af20eff44fe4e7ca21d0e0c87ec21cef7621981fd1a4a"}, + {file = "langchain_core-0.2.38.tar.gz", hash = "sha256:eb69dbedd344f2ee1f15bcea6c71a05884b867588fadc42d04632e727c1238f3"}, ] [package.dependencies] @@ -1773,21 +1773,22 @@ pydantic = [ ] PyYAML = ">=5.3" tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0" +typing-extensions = ">=4.7" [[package]] name = "langchain-google-genai" -version = "1.0.4" +version = "1.0.10" description = "An integration package connecting Google's genai package and LangChain" optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "langchain_google_genai-1.0.4-py3-none-any.whl", hash = "sha256:e567cc401f8d629fce489ee031d258da7fa4b7da0abb8ed926d6990c650b659e"}, - {file = "langchain_google_genai-1.0.4.tar.gz", hash = "sha256:b6beccfe7504ce9f8778a8df23dc49239fd91cf076a55d61759a09fc1373ca26"}, + {file = "langchain_google_genai-1.0.10-py3-none-any.whl", hash = "sha256:333f5e10ebde45b519b7816d7129cb73c5f5e6ab0df9960fa2c9f339fe9d9068"}, + {file = "langchain_google_genai-1.0.10.tar.gz", hash = "sha256:d4465aaf50825c78663618259ceca60a323d33b1a09a791631ddc7bd4806f4ce"}, ] [package.dependencies] -google-generativeai = ">=0.5.2,<0.6.0" -langchain-core = ">=0.1.45,<0.3" +google-generativeai = ">=0.7.0,<0.8.0" +langchain-core = ">=0.2.33,<0.3" [package.extras] images = ["pillow (>=10.1.0,<11.0.0)"] @@ -3134,13 +3135,13 @@ tests = ["coverage[toml] (==5.0.4)", "pytest (>=6.0.0,<7.0.0)"] [[package]] name = "pyparsing" -version = "3.1.2" +version = "3.2.0" description = "pyparsing module - Classes and methods to define and execute parsing grammars" optional = false -python-versions = ">=3.6.8" +python-versions = ">=3.9" files = [ - {file = "pyparsing-3.1.2-py3-none-any.whl", hash = "sha256:f9db75911801ed778fe61bb643079ff86601aca99fcae6345aa67292038fb742"}, - {file = "pyparsing-3.1.2.tar.gz", hash = "sha256:a1bac0ce561155ecc3ed78ca94d3c9378656ad4c94c1270de543f621420f94ad"}, + {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"}, + {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"}, ] [package.extras] @@ -3148,20 +3149,20 @@ diagrams = ["jinja2", "railroad-diagrams"] [[package]] name = "pytest" -version = "8.2.2" +version = "8.3.3" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.8" files = [ - {file = "pytest-8.2.2-py3-none-any.whl", hash = "sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343"}, - {file = "pytest-8.2.2.tar.gz", hash = "sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977"}, + {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"}, + {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"}, ] [package.dependencies] colorama = {version = "*", markers = "sys_platform == \"win32\""} iniconfig = "*" packaging = "*" -pluggy = ">=1.5,<2.0" +pluggy = ">=1.5,<2" [package.extras] dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] @@ -4412,4 +4413,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "7fc0b8b9178cce25f4d340bd469bab0a89a15bb4311a4de44850925e5de0e7e4" +content-hash = "12b02c526f80608f14fd5ca9de26986302203a66fc23a07f38c40b5c454a18ce" diff --git a/pyproject.toml b/pyproject.toml index 2ff03bc0..b035dbe5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ sqlalchemy-utils = "^0.41.2" retrying = "^1.3.4" langchain-postgres = "0.0.9" psycopg2-binary = "^2.9.9" -langchain-google-genai = "^1.0.4" psycopg-binary = "^3.1.19" langchain-core = ">=0.2.9,<0.3" langgraph="0.1.1" @@ -50,6 +49,8 @@ moralis = "^0.1.49" pytest-asyncio = "^0.23.8" feedparser = "^6.0.11" jinja2 = "^3.1.4" +langchain-google-genai = "<2.0.4" +pytest = "^8.3.3" [tool.poetry.group.dev.dependencies] ruff = "^0.4.1" diff --git a/tests/compatible-models.mdx b/tests/compatible-models.mdx index 8a9789c2..206d0661 100644 --- a/tests/compatible-models.mdx +++ b/tests/compatible-models.mdx @@ -30,13 +30,17 @@ Learn more: -| Name | Score (out of 100) | -|------|-------------------| - +| Name | Score (out of 100) | Function Call Support | +|------|-------------------|---------------------| +| mistral-nemo | 88 | ✅ | +| llama3.2 | 80 | ✅ | +| darkmoon/olmo:7B-instruct-q6-k | 0 | ❌ | ### Proprietary Models -| Name | Score (out of 100) | -|------|-------------------| -| gpt-4o-mini | 100 | -| gpt-4o | 100 | \ No newline at end of file +| Name | Score (out of 100) | Function Call Support | +|------|-------------------|---------------------| +| gpt-4o-mini | 100 | ✅ | +| gemini-1.5-flash | 100 | ✅ | +| gpt-4o | 96 | ✅ | +| gemini-1.5-pro | 96 | ✅ | \ No newline at end of file diff --git a/tests/run_test.py b/tests/run_test.py index 3d6db347..ae4788d8 100644 --- a/tests/run_test.py +++ b/tests/run_test.py @@ -4,6 +4,20 @@ import pytest from jinja2 import Environment, FileSystemLoader +# Global model configurations +PROPRIETARY_MODELS = [ + {"name": "gpt-4o-mini", "function_call_support": True}, + {"name": "gpt-4o", "function_call_support": True}, + {"name": "gemini-1.5-flash", "function_call_support": True}, + {"name": "gemini-1.5-pro", "function_call_support": True}, +] + +OPENSOURCE_MODELS = [ + {"name": "llama3.2", "function_call_support": True}, + {"name": "mistral-nemo", "function_call_support": True}, + # {"name": "darkmoon/olmo:7B-instruct-q6-k", "function_call_support": False} +] + class TestStats: def __init__(self): @@ -38,16 +52,29 @@ def pytest_terminal_summary(self, terminalreporter, exitstatus, config): def generate_model_report(proprietary_results, opensource_results): - # Convert results format - proprietary_models = [ - {'name': model_name, 'score': score} - for model_name, score in proprietary_results.items() - ] - - open_source_models = [ - {'name': model_name, 'score': score} - for model_name, score in opensource_results.items() - ] + def bool_to_emoji(value): + return "✅" if value else "❌" + + # Convert results format and sort by score + proprietary_models = [] + for model in PROPRIETARY_MODELS: + if model['name'] in proprietary_results: + proprietary_models.append({ + 'name': model['name'], + 'score': proprietary_results[model['name']], + 'function_call_support': bool_to_emoji(model['function_call_support']) + }) + proprietary_models.sort(key=lambda x: x['score'], reverse=True) + + open_source_models = [] + for model in OPENSOURCE_MODELS: + if model['name'] in opensource_results: + open_source_models.append({ + 'name': model['name'], + 'score': opensource_results[model['name']], + 'function_call_support': bool_to_emoji(model['function_call_support']) + }) + open_source_models.sort(key=lambda x: x['score'], reverse=True) # Set up template environment env = Environment(loader=FileSystemLoader('templates')) @@ -73,32 +100,21 @@ def run_model_tests(model_name): return stats.calculate_model_score() -def run_all_tests(proprietary_models, opensource_models): +def run_all_tests(): proprietary_results = {} opensource_results = {} # Test proprietary models - for model in proprietary_models: - proprietary_results[model] = run_model_tests(model) + for model in PROPRIETARY_MODELS: + proprietary_results[model['name']] = run_model_tests(model['name']) # Test open source models - for model in opensource_models: - opensource_results[model] = run_model_tests(model) + for model in OPENSOURCE_MODELS: + opensource_results[model['name']] = run_model_tests(model['name']) # Generate report generate_model_report(proprietary_results, opensource_results) if __name__ == "__main__": - # Proprietary model list - proprietary_models = [ - "gpt-4o-mini", - "gpt-4o", - ] - - # Open source model list - opensource_models = [ - # "llama3.2", - ] - - run_all_tests(proprietary_models, opensource_models) + run_all_tests() diff --git a/tests/templates/compatible-models.mdx.j2 b/tests/templates/compatible-models.mdx.j2 index 76abc58a..5bc8f70f 100644 --- a/tests/templates/compatible-models.mdx.j2 +++ b/tests/templates/compatible-models.mdx.j2 @@ -30,16 +30,16 @@ Learn more: -| Name | Score (out of 100) | -|------|-------------------| -{% for model in open_source_models -%} -| {{ model.name }} | {{ model.score }} | +| Name | Score (out of 100) | Function Call Support | +|------|-------------------|---------------------| +{%- for model in open_source_models %} +| {{ model.name }} | {{ model.score }} | {{ model.function_call_support }} | {%- endfor %} ### Proprietary Models -| Name | Score (out of 100) | -|------|-------------------| +| Name | Score (out of 100) | Function Call Support | +|------|-------------------|---------------------| {%- for model in proprietary_models %} -| {{ model.name }} | {{ model.score }} | +| {{ model.name }} | {{ model.score }} | {{ model.function_call_support }} | {%- endfor %} \ No newline at end of file