Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cdx and chennai tags pass #9

Merged
merged 3 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/containers.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,17 @@ jobs:
run: |
python3.11 -m pip install --upgrade pip
python3.11 -m pip install poetry
python3.11 -m poetry export -f requirements.txt --with=science --output target/chen-science-requirements.txt
python3.11 -m poetry export -f requirements.txt --with=science --without-hashes --output target/chen-science-requirements.txt
python3.11 -m poetry export -f requirements.txt --with=database --without-hashes --output target/chen-database-requirements.txt
- name: Upload chen to ghcr
run: |
cd target
echo $GITHUB_TOKEN | oras login ghcr.io -u $GITHUB_USERNAME --password-stdin
oras push ghcr.io/$IMAGE_NAME:v1 \
--annotation-file ../ci/annotations.json \
./chen.zip:application/vnd.appthreat.chen.layer.v1+tar \
./chen-science-requirements.txt:application/vnd.appthreat.chen.layer.v1+tar
./chen-science-requirements.txt:application/vnd.appthreat.chen.layer.v1+tar \
./chen-database-requirements.txt:application/vnd.appthreat.chen.layer.v1+tar
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
Expand Down
4 changes: 3 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,6 @@ flake.lock
chen.zip
.metals/
*.pyc
**/__pycache__/
**/__pycache__/
.vscode/
project/metals.sbt
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Code Hierarchy Exploration Net (chen) is an advanced exploration toolkit for you
- Node.js > 16 (To run [atom](https://github.com/AppThreat/atom))
- Minimum 16GB RAM

### Additional requirements

- Rust (For rocksdb-py compilation)

## Installation

```shell
Expand All @@ -27,6 +31,12 @@ To download the chen distribution including the science pack.
chen --download
```

To generate custom graphs and models with atom for data science, download the scientific pack which installs support for PyTorch ecosystem.

```shell
chen --download --with-science
```

Once the download finishes, the command will display the download location along with the environment variables that need to be set to invoke `chennai` console. Example output below:

```shell
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name := "chen"
ThisBuild / organization := "io.appthreat"
ThisBuild / version := "0.0.9"
ThisBuild / version := "0.0.10"
ThisBuild / scalaVersion := "3.3.1"

val cpgVersion = "1.4.22"
Expand Down
19 changes: 14 additions & 5 deletions chenpy/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ def build_args():
help="Download the latest chen distribution in platform specific "
"user_data_dir",
)
parser.add_argument(
"--with-science",
action="store_true",
default=False,
dest="science_pack",
help="Download the science pack",
)
parser.add_argument(
"--server",
action="store_true",
Expand Down Expand Up @@ -104,7 +111,7 @@ def fix_envs():
)


def download_chen_distribution(overwrite=False):
def download_chen_distribution(overwrite=False, science_pack=False):
if os.path.exists(os.path.join(config.chen_home, "platform")):
if not overwrite:
fix_envs()
Expand Down Expand Up @@ -150,16 +157,18 @@ def download_chen_distribution(overwrite=False):
pass
# Install the science pack
if req_files:
install_science_modules()
install_py_modules("database")
if science_pack:
install_py_modules("science")
fix_envs()


def install_science_modules():
def install_py_modules(pack="database"):
"""
Install the required science modules
"""
LOG.debug("About to install the science pack using cpu-only configuration")
req_file = os.path.join(config.chen_home, "chen-science-requirements.txt")
req_file = os.path.join(config.chen_home, f"chen-{pack}-requirements.txt")
if os.path.exists(req_file):
subprocess.check_call(
[sys.executable, "-m", "pip", "install", "-r", req_file],
Expand All @@ -174,7 +183,7 @@ def main():
and generates reports based on the results.
"""
args = build_args()
download_chen_distribution(args.download)
download_chen_distribution(args.download, args.science_pack)


if __name__ == "__main__":
Expand Down
16 changes: 16 additions & 0 deletions chenpy/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import rocksdbpy


def db_options():
opts = rocksdbpy.Option()
opts.create_if_missing(True)
opts.set_max_open_files(10)
opts.set_use_fsync(True)
opts.set_bytes_per_sync(1024 * 1024)
opts.optimize_for_point_lookup(1024 * 1024)
opts.set_bloom_locality(16)
return opts


def get(path):
return rocksdbpy.open(path, db_options())
38 changes: 27 additions & 11 deletions chenpy/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,25 @@

from chenpy.utils import calculate_hash

DATABASE_PACK_AVAILABLE = True
SCIENCE_PACK_AVAILABLE = True
try:
import networkx as nx
from networkx.readwrite import json_graph, read_graphml
except ImportError:
DATABASE_PACK_AVAILABLE = False

try:
import pydotplus
import torch
from networkx.readwrite import json_graph, read_graphml
from torch import Tensor
from torch_geometric.data import Data
from torchtext.data.functional import (
generate_sp_model,
load_sp_model,
sentencepiece_numericalizer,
sentencepiece_tokenizer,
)
except ImportError:
SCIENCE_PACK_AVAILABLE = False

Expand Down Expand Up @@ -226,26 +237,30 @@ def node_match_fn(n1, n2):


def gep(first_graph, second_graph, upper_bound=500):
"""Function to compute the difference based on optimal edit path algorithm"""
return nx.optimal_edit_paths(
distance = nx.optimal_edit_paths(
first_graph,
second_graph,
node_match=node_match_fn,
edge_match=node_match_fn,
upper_bound=upper_bound,
)
if distance is None:
distance = -1
return distance


def ged(first_graph, second_graph, timeout=5, upper_bound=500):
"""Function to compute the difference based on graph edit distance algorithm"""
return nx.graph_edit_distance(
distance = nx.graph_edit_distance(
first_graph,
second_graph,
node_match=node_match_fn,
edge_match=node_match_fn,
timeout=timeout,
upper_bound=upper_bound,
)
if distance is None:
distance = -1
return distance


def write_dot(G, path):
Expand Down Expand Up @@ -296,22 +311,23 @@ def summarize(G, as_dict=False, as_dot=False):
return summary_graph


def is_similar(M1, M2, upper_bound=500, timeout=5):
"""Function to check if two graphs are similar. To simplify the problem, first the raw graph difference is computed to check if the graphs are the same.
If not graph edit distance is computed with a fixed timeout to help answer the question
"""
def is_similar(M1, M2, edit_distance=10, upper_bound=500, timeout=5):
if not diff_graph(M1, M2, as_dict=True):
return True
distance = ged(M1, M2, upper_bound=upper_bound, timeout=timeout)
if distance is None:
if distance == -1:
return False
return True
return int(distance) < edit_distance


def convert_graphml(
gml_file, force_multigraph=False, as_graph=True, as_adjacency_data=False
):
"""Function to convert graphml to networkx"""
if not DATABASE_PACK_AVAILABLE:
return RuntimeError(
"Graph database dependencies missing. Please refer to the documentation to install the database pack or use the official chen container image."
)
try:
G = read_graphml(gml_file, force_multigraph=force_multigraph)
if as_graph:
Expand Down
28 changes: 27 additions & 1 deletion chenpy/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,43 @@
import os

from rich.console import Console
from rich.highlighter import RegexHighlighter
from rich.logging import RichHandler
from rich.theme import Theme

custom_theme = Theme({"info": "#5A7C90", "warning": "#FF753D", "danger": "bold red"})

class CustomHighlighter(RegexHighlighter):
base_style = "atom."
highlights = [
r"(?P<method>([\w-]+\.)+[\w-]+[^<>:(),]?)",
r"(?P<path>(\w+\/.*\.[\w:]+))",
r"(?P<params>[(]([\w,-]+\.)+?[\w-]+[)]$)",
r"(?P<opers>(unresolvedNamespace|unresolvedSignature|init|operators|operator|clinit))",
]


custom_theme = Theme(
{
"atom.path": "#7c8082",
"atom.params": "#5a7c90",
"atom.opers": "#7c8082",
"atom.method": "#FF753D",
"info": "#5A7C90",
"warning": "#FF753D",
"danger": "bold red",
}
)


console = Console(
log_time=False,
log_path=False,
theme=custom_theme,
width=int(os.getenv("COLUMNS", "270")),
color_system="256",
force_terminal=True,
highlight=True,
highlighter=CustomHighlighter(),
record=True,
)

Expand Down
Empty file added chenpy/source/__init__.py
Empty file.
122 changes: 122 additions & 0 deletions chenpy/source/ghsa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import os

import httpx

# GitHub advisory feed url
ghsa_api_url = os.getenv("GITHUB_GRAPHQL_URL", "https://api.github.com/graphql")
api_token = os.getenv("GITHUB_TOKEN")
headers = {"Authorization": f"token {api_token}"}

ecosystem_type_dict = {
"go": "golang",
"rust": "cargo",
"pip": "pypi",
"rubygems": "gem",
}


def get_query(cve_or_ghsa=None, only_malware=False, extra_clause=None):
"""Method to construct the graphql query"""
extra_args = ""
if not cve_or_ghsa:
extra_args = "first: 100"
else:
id_type = "GHSA" if cve_or_ghsa.startswith("GHSA") else "CVE"
extra_args = (
'first: 100, identifier: {type: %(id_type)s, value: "%(cve_or_ghsa)s"}'
% dict(id_type=id_type, cve_or_ghsa=cve_or_ghsa)
)
if only_malware:
extra_args = f"{extra_args}, classifications:MALWARE"
if extra_clause:
extra_args = f"{extra_args}, {extra_clause}"
gqljson = {
"query": """
{
securityAdvisories(
%(extra_args)s
) {
nodes {
id
ghsaId
summary
description
identifiers {
type
value
}
origin
publishedAt
updatedAt
references {
url
}
severity
withdrawnAt
vulnerabilities(first: 10) {
nodes {
firstPatchedVersion {
identifier
}
package {
ecosystem
name
}
severity
updatedAt
vulnerableVersionRange
}
}
}
}
}
"""
% dict(extra_args=extra_args)
}
return gqljson


def parse_response(json_data):
"""Parse json response and convert to list of purls"""
purl_list = []
for node in (
json_data.get("data", {}).get("securityAdvisories", {}).get("nodes", {})
):
ghsa_id = node.get("ghsaId")
vulnerable_nodes = node.get("vulnerabilities", {}).get("nodes", [])
for vn in vulnerable_nodes:
pkg = vn.get("package", {})
version = ""
if vn.get("firstPatchedVersion"):
version = vn.get("firstPatchedVersion", {}).get("identifier", "")
elif vn.get("vulnerableVersionRange"):
version = vn.get("vulnerableVersionRange").split(" ")[-1]
if pkg:
ptype = pkg.get("ecosystem", "").lower()
pname = pkg.get("name", "").lower().replace(":", "/")
# This is the fixed version
if ptype and pname and version:
purl = (
f"pkg:{ecosystem_type_dict.get(ptype, ptype)}/{pname}@{version}"
)
purl_list.append(
{
"ghsaId": ghsa_id,
"purl": purl,
}
)
return purl_list


def get_download_urls(cve_or_ghsa=None, only_malware=False):
"""Method to get download urls for the packages belonging to the CVE"""
if not api_token:
raise ValueError("GITHUB_TOKEN is required with read:packages scope")
client = httpx.Client(http2=True, follow_redirects=True, timeout=180)
r = client.post(
url=ghsa_api_url,
json=get_query(cve_or_ghsa=cve_or_ghsa, only_malware=only_malware),
headers=headers,
)
json_data = r.json()
return parse_response(json_data)
Loading