add crew ai job application, and start llama index

jbcodeforce · Jun 3, 2024 · 4c61832 · 4c61832
1 parent aecd2dc
commit 4c61832
Show file tree

Hide file tree

Showing 10 changed files with 241 additions and 91 deletions.
diff --git a/.gitignore b/.gitignore
@@ -2,6 +2,7 @@
 data/17flowers/
 site/
 **/.venv/
+venv/
 **/.pytorch/
 .conda/
 .vscode/
@@ -19,4 +20,5 @@ deep-neural-net/d2l-en/
 pytorch/computer-vision/data/
 **/models/
 **/.env
-**/.taipy/
+**/.taipy/
+techno/crew-ai/db/
diff --git a/LlamaIndex/requirements.txt b/LlamaIndex/requirements.txt
@@ -0,0 +1,5 @@
+llama-index
+python-dotenv
+llama-index-llms-openai
+llama-index-embeddings-openai
+llama-index-llms-mistralai
diff --git a/LlamaIndex/router_engine.py b/LlamaIndex/router_engine.py
@@ -0,0 +1,67 @@
+"""
+Load a pdf document, split it using sentence splitter 
+"""
+import sys
+from dotenv import load_dotenv
+# import nest_asyncio
+from llama_index.core import SimpleDirectoryReader, Settings, SummaryIndex, VectorStoreIndex
+from llama_index.core.node_parser import SentenceSplitter
+from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
+from llama_index.core.selectors import LLMSingleSelector
+from llama_index.llms.openai import OpenAI
+from llama_index.core.tools import QueryEngineTool
+from llama_index.embeddings.openai import OpenAIEmbedding
+
+load_dotenv("../.env")
+
+def load_docs(fn: str):
+    docs = SimpleDirectoryReader(input_files=[fn]).load_data()
+
+    splitter = SentenceSplitter(chunk_size=1024)
+    nodes = splitter.get_nodes_from_documents(docs)
+    Settings.llm = OpenAI(model="gpt-3.5-turbo")
+    Settings.embed_model = OpenAIEmbedding(model="text-embedding-ada-002")
+    print("--- Build two indexes one for the summary, one for the vector (for semantic query based on simililarity)")
+    summary_index =SummaryIndex(nodes)
+    vector_index = VectorStoreIndex(nodes)
+    return summary_index,vector_index
+
+def get_router_engine(summary_index,vector_index):
+    # Build query engines
+    summary_query_engine = summary_index.as_query_engine( response_mode="tree_summarize", use_async=True)
+    vector_query_engine = vector_index.as_query_engine()
+
+
+    summary_tool = QueryEngineTool.from_defaults(
+        query_engine=summary_query_engine,
+        description=(
+            "Useful for summarization questions related to MetaGPT"
+        ),
+    )
+
+    vector_tool = QueryEngineTool.from_defaults(
+        query_engine=vector_query_engine,
+        description=(
+            "Useful for retrieving specific context from the MetaGPT paper."
+        ),
+    )
+
+    query_engine= RouterQueryEngine(
+        selector=LLMSingleSelector.from_defaults(),
+        query_engine_tools=[ summary_tool, vector_tool],
+        verbose=True
+    )
+    return query_engine
+
+if __name__ == "__main__":
+    filename=sys.argv[1]
+    print(f"--> Build indexes from content of {filename}")
+    summary_index,vector_index = load_docs(filename)
+    engine=get_router_engine(summary_index,vector_index)
+    response = engine.query("What is the summary of the document?")
+    print(str(response))
+    print(len(response.source_nodes))
+    response = engine.query(
+        "How do agents share information with other agents?"
+    )
+    print(str(response))
diff --git a/docs/coding/llama-index.md b/docs/coding/llama-index.md
@@ -0,0 +1,11 @@
+# LlamaIndex library
+
+[LlamaIndex](https://docs.llamaindex.ai/en/stable/) is a framework for building context-augmented LLM applications, like Q&A, chatbot, Document understanding and extraction, and agentic apps.
+
+It includes data connectors, indexes, NL engines, Agents, and a set of tools for observabiltiy and evaluation.
+
+LlamaParse is an interesting document parsing solution. [LlamaHub](https://llamahub.ai/)
+
+## Getting Started
+
+* Routing queries example using RAG [router_engine.py](https://github.com/jbcodeforce/ML-studies/tree/master/LlamaIndex/router_engine.py).
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -38,6 +38,7 @@ nav:
     - Feature Store: techno/feature_store.md/
     - Kaggle: kaggle.md
     - LangChain: coding/langchain.md
+    - LlamaIndex: coding/llama_index.md
     - OpenSearch: techno/opensearch.md
     - Pandas: coding/pandas.md
     - Python studies:  https://jbcodeforce.github.io/python-code/

diff --git a/techno/crew-ai/a_resume.md b/techno/crew-ai/a_resume.md
@@ -1,33 +1,37 @@
 
-
-Jerome Boyer
-Santa Clara, CA - USA
 Former  IBM - Distinguished Engineer
 AWS Principal Solution Architect 
-Master Computer Science Nice University, France                                               [Linkedin]()
+Master Computer Science Nice University, France                                               [Linkedin](https://www.linkedin.com/in/jeromeboyer/)
 
 
 ## Background summary
 
-For the last ten years as AWS principal solution architect and an IBM distinguished engineer, I help customers adopt hybrid cloud, designing complex solutions around microservices, streaming and data management for AI/ML. I'm currently helping customer adopting Generative AI agents combined with traditional symbolic AI to get real actionable value from AI. 
-I have years of experience in business process automation and decision automation with rule engine systems. Book authors, conferences speaker, I’m still hands-on to develop MVPs, proof of technology. 
+For the last ten years as AWS principal solution architect and an IBM distinguished engineer, I help customers adopt hybrid cloud, designing complex solutions around microservices, real-time streaming and data management for AI/ML. I'm currently helping customer adopting Generative AI models combined with traditional symbolic AI to get real actionable value from AI. 
+During my carreer I have led consulting engagement in business process automation and decision automation with rule engine systems implementation. As CTO for event-driven architecture at IBM I have led event-driven and streaming solutions from strategic positioning at C-level to hands-on proof of technology for enterprise architects or lead developers.
+Book author, conference speaker, I’m still hands-on to develop MVPs, proof of technology. 
 
 I also contributed to multiple patents and publications on business rule models, IBM BPM and decision management integrations.
 
 I am looking to guide customers to work on App modernization and cloud migration projects.
 
 ## Skills
 
-Amazon Cloud Architecture Professional Certified
-Event-driven architecture and streaming technologies with Kafka, Flink, Kafka Streams.
-Cloud and hybrid technology such as Serverless Lambda, API Gateway, Kubernetes, OpenShift, Java Microprofile and Quarkus
-AI: Classification, clustering, Deep Learning with PyTorch, Generative AI  with prompt engineering, RAG, LangChain, LangGraph, LlamaIndex anddifferent LLMs
-Methodology: Agile dev, Lean Startup, Design Thinking, Event Storming and Domain Driven Design
+* Amazon Cloud Architecture Professional Certified
+* Event-driven architecture and streaming technologies with Kafka, Flink, Kafka Streams.
+* Cloud and hybrid technology such as Serverless Lambda, API Gateway, Kubernetes, OpenShift, Java Microprofile and Quarkus
+* AI: Classification, clustering, Deep Learning with PyTorch, 
+Generative AI with OpenAI, Mixtral, Claude, Gemini, learning prompt engineering practices, RAG processing, function calling, and more recently Agentic. I led  proof of technology implementations using Langchain, LangGraph, CrewAI,  (Python). 
+* Databricks and Iceberg lakehouse for data governance 
+* Methodology: Agile development, Lean Startup, Design Thinking, Event Storming and Domain Driven Design
+* Programming languages: Python, Java, nodejs.
+* Technical sell, and solution selling
+* Master degre in computer sciences
 
 
 ## Professional experience
+
 04/204 - Present: Athena Decision Systems: Principal consultant for Neuro Symbolic AI solution implementations.
-09/2022- 03/2024: AWS Principal Solution architect - ISV market support Data & AI ISVs, for serverless, event-driven and streaming or Generative AI, multi-tenancy solutions.
+09/2022- present: AWS Principal Solution architect - ISV market support Data & AI ISVs, for serverless, event-driven and streaming or Generative AI, multi-tenancy solutions.
 
 10/2016-09/2022: Distinguished Engineer, Event-driven architecture CTO, Specialized in hybrid cloud and reactive microservices based solution. Engaged with major IBM strategic accounts. Yearly business impact around 150 M$. 
 
@@ -36,3 +40,27 @@ Worldwide position, involved in complex solution delivery around IBM business pr
 
 12/99-1/09  ILOG Inc – Professional Service – Technical Director
 I led the architect groups worldwide to develop best practices and highly qualified architects to support complex project delivery. Directly involved in the most complex NA consulting engagements.Transform a 10 M$ to 80M$ consulting business in 2 years.
+
+## Professional certifications
+
+* 2023 AWS Solution Architect Professional - Serverless Technical Field Team - Generative AI and Databricks advocate.
+* 2016: IBM Distinguished Engineer - CTO
+* 2009-IBM Senior Technical Staff Member: technical position intended to recognize individuals with a clearly superior record of sustained technical achievement and strong potential for continuing contribution to IBM's growth
+* 1999- PMI: Project Management Professional
+
+## Publications and presentations
+
+* EDA virtual book: https://jbcodeforce.github.io/eda-studies/  with  all the related repositories to support assets and reference implementations. 
+* Patent issued: #10885440 “Contextual evaluation of process model and extraction” – 4 others filed.
+* Co Author: “Sustainable IT Architecture” Wisley
+* Co Author: “Agile Business Rule Development with JRules” Springer. 
+* Agile Business Rule Development. First Open Source methodology to develop business application with BRMS 
+* Speaker every year at IBM conferences like Impact, Think and Interconnect from  2010 to 2021
+* Speaker at external conference like Business Rule forum, ILOG Dialog, Gartner BPM symposium
+* IBM President Award for a network switch simulation test framework 1996
+* ILOG consultant of the year 2003, 1M$ quarter quota achievement 1Q03
+* ILOG 3 stars (2004) and 2 stars employee (2006)
+* IBM Outstanding Corporate Achievement award – 2011, 2013, 2016, 2018, 2019
+* IBM Exceptional Restricted Share Distribution Award – 2014, 2016, 2018, 2019, 2020
+* AWS Generative AI 2023 Hackathon winner
+
diff --git a/techno/crew-ai/gcp-interview.md b/techno/crew-ai/gcp-interview.md
@@ -0,0 +1,22 @@
+
+Interview Questions:
+
+1. Can you share more about your experience with AI applications, particularly in deep learning and generative AI?
+2. How have you utilized statistical programming languages, specifically Python, in your previous roles? Can you give specific examples of projects or initiatives where you applied these skills?
+3. Tell us about a time when you delivered a technical presentation that influenced stakeholders' decisions. How did you approach it?
+4. You mentioned that you have experience owning outcomes and making significant decisions. Can you give some specific examples where your decision-making impacted the business positively?
+5. Could you elaborate on your experience with ML frameworks like TensorFlow, PyTorch, and Spark ML? How have you used them in your projects?
+6. Can you share your experience with distributed training and optimizing performance versus costs? What strategies did you employ to achieve this?
+7. How have you incorporated CI/CD solutions in the context of MLOps and LLMOps in your previous roles?
+8. You mentioned that you had experience in systems design and the ability to design and explain data pipelines and ML pipelines. Could you walk us through your process?
+
+Talking Points:
+
+1. Jerome's extensive experience in AI applications, notably in deep learning and generative AI.
+2. His proficiency in using statistical programming languages like Python in various AI applications.
+3. Jerome’s experience in delivering technical presentations and leading business value sessions.
+4. His experience in owning outcomes and making significant decisions that have led to substantial business impacts.
+5. Jerome's expertise with ML frameworks like TensorFlow, PyTorch, JAX, and Spark ML.
+6. His experience with distributed training and optimizing performance versus costs.
+7. Jerome’s familiarity with CI/CD solutions in the context of MLOps and LLMOps.
+8. His experience in systems design, with the ability to design and explain data pipelines and ML pipelines.
diff --git a/techno/crew-ai/interview_materials.md b/techno/crew-ai/interview_materials.md
@@ -1,21 +1,19 @@
 Interview Questions:
-
-1. Can you share more about your experience with AI applications, particularly in deep learning and generative AI?
-2. How have you utilized statistical programming languages, specifically Python, in your previous roles? Can you give specific examples of projects or initiatives where you applied these skills?
-3. Tell us about a time when you delivered a technical presentation that influenced stakeholders' decisions. How did you approach it?
-4. You mentioned that you have experience owning outcomes and making significant decisions. Can you give some specific examples where your decision-making impacted the business positively?
-5. Could you elaborate on your experience with ML frameworks like TensorFlow, PyTorch, and Spark ML? How have you used them in your projects?
-6. Can you share your experience with distributed training and optimizing performance versus costs? What strategies did you employ to achieve this?
-7. How have you incorporated CI/CD solutions in the context of MLOps and LLMOps in your previous roles?
-8. You mentioned that you had experience in systems design and the ability to design and explain data pipelines and ML pipelines. Could you walk us through your process?
+1. Can you share some examples of real-world business problems you have solved using data-driven optimization and machine learning modeling?
+2. What kind of ML models have you built from scratch for new applications? Can you explain the process you followed and the challenges you faced?
+3. As an expert in NLP, how have you used this skill in your previous roles? Can you provide specific examples?
+4. Can you explain your experience with SQL and handling large-scale data? Can you give an example of a project where you had to manage a large volume of data?
+5. Your resume mentions that you are comfortable with Python, Java, and Node.js. Can you share how you have used these languages in your work? 
+6. Can you talk about your experience in the healthcare industry? If you haven't worked in this industry before, how do you plan to apply your skills and knowledge to it?
+7. You have a Master's Degree in Computer Science from Nice University, France. How has your educational background helped you in your career? 
+8. How have you measured and optimized the direct business impact of your work in your previous roles?
 
 Talking Points:
-
-1. Jerome's extensive experience in AI applications, notably in deep learning and generative AI.
-2. His proficiency in using statistical programming languages like Python in various AI applications.
-3. Jerome’s experience in delivering technical presentations and leading business value sessions.
-4. His experience in owning outcomes and making significant decisions that have led to substantial business impacts.
-5. Jerome's expertise with ML frameworks like TensorFlow, PyTorch, JAX, and Spark ML.
-6. His experience with distributed training and optimizing performance versus costs.
-7. Jerome’s familiarity with CI/CD solutions in the context of MLOps and LLMOps.
-8. His experience in systems design, with the ability to design and explain data pipelines and ML pipelines.
+1. Discuss the candidate's experience as a Principal Consultant at Athena Decision Systems and AWS Principal Solution Architect at Amazon.
+2. Discuss the candidate's proficiency in Python, Java, and Node.js and how it applies to the position.
+3. Discuss the candidate's experience with Generative AI and how it could be applied in the role of NLP Scientist/Engineer at Machinify.
+4. Discuss the candidate's desire to learn about the healthcare industry and how this interest could be beneficial to the role. 
+5. Discuss the candidate's experience in handling large-scale data using SQL, Databricks, and Iceberg lakehouse for data governance.
+6. Discuss the candidate's experience building ML models from scratch for new applications.
+7. Discuss the candidate's certifications and how they demonstrate his commitment to continued learning and expertise in his field.
+8. Discuss the awards the candidate has received and what they mean for his capabilities in the role.
diff --git a/techno/crew-ai/job_application.py b/techno/crew-ai/job_application.py
@@ -1,12 +1,16 @@
 from crewai import Crew, Task, Agent
 from crewai_tools import FileReadTool, ScrapeWebsiteTool, MDXSearchTool, SerperDevTool
 from dotenv import load_dotenv
-import os
+import os, getopt,sys
 
 load_dotenv("../../.env")
 
 SERPER_API_KEY= os.getenv("SERPER_API_KEY")
-
+"""
+A program using multiple agents to analyse a job posting, do personal profiling, and resume tuning
+from a source resume to taylor it for the job
+Use web scrapping and search tools.
+"""
 search_tool = SerperDevTool()
 scrape_tool = ScrapeWebsiteTool()
 read_resume = FileReadTool(file_path='./a_resume.md')
@@ -159,19 +163,48 @@
     verbose=True
 )
 
-job_application_inputs = {
-    'job_posting_url': 'https://www.google.com/about/careers/applications/jobs/results/134490933082104518-field-solution-architect-iii-generative-ai-google-cloud',
-    'github_url': 'https://github.com/jbcodeforce',
-    'personal_writeup': """Jerome is an accomplished Software
-    Engineering Leader with 25 years of experience, former ibm distinguished engineer, 
-    and principal solution architect at AWS, he is hands-on consultant around AI, generative AI and cloud deployments.
-    With years of experience in business process automation and decision automation with rule engine systems. Book authors, conferences speaker, still hands-on to develop MVPs, and proof of technology. 
 
-Contributed to multiple patents and publications on business rule models, IBM BPM and decision management integratio
 
-"""
-}
 
-result = job_application_crew.kickoff(inputs=job_application_inputs)
-print(result)
+
+def usage():
+    print("--- Job application resume tayloring ---")
+    print("python3 job_application -u URL_OF_JOB -g GITHUB_URL -p personal_write_up")
+
+def parse_args():
+    GITHUB='https://github.com/jbcodeforce'
+    PERSONAL="""Jerome is an accomplished Software
+        Engineering Leader with 25 years of experience, former ibm distinguished engineer, 
+        and principal solution architect at AWS.
+    """
+    URL=""
+    try:
+        opts, args = getopt.getopt(sys.argv[1:],"hu:g:p:",["url=","github=","pers_write_up="])
+    except getopt.GetoptError:
+        print(usage())
+        sys.exit(2)
+    for opt, arg in opts:
+        print(opt, arg)
+        if opt == '-h':
+            usage()
+            sys.exit()
+        elif opt in ("-u", "--url"):
+            URL = arg
+        elif opt in ("-g", "--github"):
+            GITHUB = arg
+        elif opt in ("-p", "--pers_write_up"):
+            PERSONAL = arg
+    return URL,GITHUB,PERSONAL
+
+if __name__ == "__main__":    
+    URL,GITHUB,PERSONAL = parse_args()
+    job_application_inputs = {
+        'job_posting_url': URL,
+        'github_url': GITHUB,
+        'personal_writeup': PERSONAL
+    }
+    print(job_application_inputs)
+
+    result = job_application_crew.kickoff(inputs=job_application_inputs)
+    print(result)