Skip to content

Commit

Permalink
Fix for cross join with multiple repeated data + tests
Browse files Browse the repository at this point in the history
  • Loading branch information
kasbohm committed May 27, 2024
1 parent 68c70a1 commit bf09e6b
Show file tree
Hide file tree
Showing 3 changed files with 247 additions and 8 deletions.
117 changes: 117 additions & 0 deletions test/test_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import pytest
import requests
import json
import pandas as pd
from test_fixtures import run_dbimport, start_webui, setup_database

def test_webui_landing(setup_database, run_dbimport, start_webui):
response = requests.get("http://127.0.0.1:3880/php/projects.php")
assert response.status_code == 200

expected_response = {
"status_ok": True,
"status_msg": "ok",
"type": "dbmeta",
"data": ["Proj1", "Proj2"],
}
assert response.json() == expected_response

def test_api_list_vars(setup_database, run_dbimport, start_webui):
response = requests.get("http://127.0.0.1:3880/php/dbmeta.php?prj=all")
assert response.status_code == 200

r = response.json()
assert len(r["data"]["tables"]) > 1
assert len(r["data"]["tables"]) < 100
assert r["data"]["tables"][0]["id"] == "core"

def test_api_doublesession_bug(setup_database, run_dbimport, start_webui):
url = "http://127.0.0.1:3880/php/query_json.php"
payload = {
"columns": [
{"table_id": "core", "column_id": "subject_id"},
{"table_id": "core", "column_id": "project_id"},
{"table_id": "core", "column_id": "wave_code"},
{"table_id": "core", "column_id": "subject_sex"},
{"table_id": "core", "column_id": "visit_number"},
{"table_id": "mri_aseg", "column_id": "volume_rh_hippocampus"},
{"table_id": "mri_aparc", "column_id": "area_lh_bankssts"},
],
"set_op": "all",
"date": "2024-05-22T12:52:00.708Z",
"version": "undefined (Wed May 22 12:42:59 2024)",
"project": "Proj1",
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, data=json.dumps(payload), headers=headers)
assert response.status_code == 200

r = response.json()
column_def = r["data"]["column_def"]
rows = r["data"]["rows"]
headers = [col["id"] for col in column_def]
df = pd.DataFrame(rows, columns=headers)

assert len(df[df["subject_id"] == 9900070]) == 3, "Expected 3 rows for subject 9900070"


def test_api_doublesession_bug_union(setup_database, run_dbimport, start_webui):
url = "http://127.0.0.1:3880/php/query_json.php"
payload = {
"columns": [
{"table_id": "core", "column_id": "subject_id"},
{"table_id": "core", "column_id": "project_id"},
{"table_id": "core", "column_id": "wave_code"},
{"table_id": "core", "column_id": "subject_sex"},
{"table_id": "core", "column_id": "visit_number"},
{"table_id": "mri_aseg", "column_id": "volume_rh_hippocampus"},
{"table_id": "mri_aparc", "column_id": "area_lh_bankssts"},
],
"set_op": "union",
"date": "2024-05-22T12:52:00.708Z",
"version": "undefined (Wed May 22 12:42:59 2024)",
"project": "Proj1",
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, data=json.dumps(payload), headers=headers)
assert response.status_code == 200

r = response.json()
column_def = r["data"]["column_def"]
rows = r["data"]["rows"]
headers = [col["id"] for col in column_def]
df = pd.DataFrame(rows, columns=headers)

assert len(df[df["subject_id"] == 9900070]) == 2, "Expected 2 rows for subject 9900070"

def test_api_doublesession_bug_intersection(setup_database, run_dbimport, start_webui):
url = "http://127.0.0.1:3880/php/query_json.php"
payload = {
"columns": [
{"table_id": "core", "column_id": "subject_id"},
{"table_id": "core", "column_id": "project_id"},
{"table_id": "core", "column_id": "wave_code"},
{"table_id": "core", "column_id": "subject_sex"},
{"table_id": "core", "column_id": "visit_number"},
{"table_id": "mri_aseg", "column_id": "volume_rh_hippocampus"},
{"table_id": "mri_aparc", "column_id": "area_lh_bankssts"},
],
"set_op": "intersection",
"date": "2024-05-22T12:52:00.708Z",
"version": "undefined (Wed May 22 12:42:59 2024)",
"project": "Proj1",
}
headers = {"Content-Type": "application/json"}

response = requests.post(url, data=json.dumps(payload), headers=headers)
assert response.status_code == 200

r = response.json()
column_def = r["data"]["column_def"]
rows = r["data"]["rows"]
headers = [col["id"] for col in column_def]
df = pd.DataFrame(rows, columns=headers)

assert len(df[df["subject_id"] == 9900070]) == 2, "Expected 2 rows for subject 9900070"
105 changes: 105 additions & 0 deletions test/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import pytest
import subprocess
import time
import os
import requests
from dotenv import load_dotenv

load_dotenv("config_default.txt")

def run_make_directive(directive):
process = subprocess.run(["make", directive], capture_output=True, text=True)
print(f"Running make directive {directive}:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
if process.returncode != 0:
raise RuntimeError(f"Failed to run make directive {directive}: {process.stderr}")
return process


def db_isready(host, port, dbname):
process = subprocess.run(
["3rdparty/postgresql/bin/pg_isready", "-h", host, "-p", port, "-d", dbname],
capture_output=True,
text=True,
)
print(f"Checking if database is ready:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
return process.returncode == 0


def start_database():
db_host = os.getenv("DBHOST")
db_port = os.getenv("DBPORT")
db_name = os.getenv("DBNAME")
pgdata = os.getenv("PGDATA")
print(f"Starting database with host={db_host}, port={db_port}, dbname={db_name}, pgdata={pgdata}")

if db_isready(db_host, db_port, db_name):
print("Database is already ready.")
return

process = subprocess.run(["make", "dbstart"], capture_output=True, text=True)
print(f"Starting database:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
if process.returncode != 0:
raise RuntimeError(f"Failed to start the database: {process.stderr}")

for _ in range(30):
result = db_isready(db_host, db_port, db_name)
if result:
print("Database started successfully.")
return
time.sleep(1)

stop_database()
raise RuntimeError("Database did not start in time")


def import_data():
process = subprocess.run(["make", "run_dbimport"], capture_output=True, text=True)
print(f"Importing data:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
if process.returncode != 0:
raise RuntimeError(f"Failed to import data: {process.stderr}")
return process

def stop_database():
process = subprocess.run(["make", "dbstop"], capture_output=True, text=True)
print(f"Stooping database:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
if process.returncode != 0:
raise RuntimeError(f"Failed to stop the database: {process.stderr}")

def clean_database():
process = subprocess.run(["make", "dberase"], capture_output=True, text=True)
print(f"Cleaning database:\nSTDOUT: {process.stdout}\nSTDERR: {process.stderr}")
if process.returncode != 0:
raise RuntimeError(f"Failed to clean the database: {process.stderr}")

@pytest.fixture(scope="module", autouse=True)
def setup_database():
clean_database()
start_database()
yield
clean_database()

@pytest.fixture(scope="module", autouse=True)
def run_dbimport(setup_database):
output = run_make_directive("run_dbimport")
return output

@pytest.fixture(scope="module", autouse=True)
def start_webui():
webui_process = subprocess.Popen(
["make", "run_webui"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
time.sleep(10) # Increase sleep time if necessary

try:
response = requests.get("http://127.0.0.1:3880/php/projects.php")
if response.status_code != 200:
raise RuntimeError("Web UI did not start correctly")
except Exception as e:
webui_process.terminate()
webui_process.wait()
raise RuntimeError("Web UI did not start correctly: " + str(e))

yield webui_process

webui_process.terminate()
webui_process.wait()
33 changes: 25 additions & 8 deletions webui/sql_build_query.php
Original file line number Diff line number Diff line change
Expand Up @@ -129,12 +129,13 @@ function _get_repeated_groups($dbmeta, $sel_tabs, $sel_cols){
continue;
}
// if t is not an array, it cannot have repeated group
if (!is_array($t)) {
continue;
}
// IKA: I think this allways skips/continues, as class is never array?
// if (!is_array($t)) {
// continue;
// }
// has repeated_goup?
echo get_class($t);
if (!array_key_exists("repeated_group", $t)) {
// echo get_class($t);
if (!property_exists($t, "repeated_group")) {
continue;
}
// init result?
Expand Down Expand Up @@ -163,15 +164,30 @@ function _get_repeated_groups($dbmeta, $sel_tabs, $sel_cols){
function _get_sql_where_repeated($dbmeta, $sel_tabs, $sel_cols) {
$rgroups = _get_repeated_groups($dbmeta, $sel_tabs, $sel_cols);
// generate where conditions; tab0.col=tabn.col
// IKA: Add WHERE condition AND (... OR ... IS NULL)
// This fixes cross join issue. Should probably be done in the JOINS, but when code
// is structured like it is, easier to do it here.
// Probably breaks something else ¯\_(ツ)_/¯
$sqls = array();
foreach ($rgroups as $rg) {
if (count(array_values($rg)) <= 1) {
continue;
}
array_push($sqls, " AND (");
for ($i=1; $i < count($rg); $i++) {
array_push($sqls,
"OR " . $rg[0 ]["table_id"] . "." . $rg[0 ]["col_id"] .
$rg[0 ]["table_id"] . "." . $rg[0 ]["col_id"] .
" = " . $rg[$i]["table_id"] . "." . $rg[$i]["col_id"]
);
}
}
};
// Does this break anything, maybe union/intersection selection option? TODO: test
for ($i=0; $i < count($rg); $i++) {
array_push($sqls,
" OR " . $rg[$i]["table_id"] . "." . $rg[$i]["col_id"] . " IS NULL "
);
};
array_push($sqls,")");
};
return join("\n", $sqls);
}

Expand Down Expand Up @@ -213,6 +229,7 @@ function _get_sql_select_repeated($dbmeta, $sel_tabs, $sel_cols) {
{$sql_where_repeated}
ORDER BY core.subject_id
";
// error_log($sql);
return $sql;
}

Expand Down

0 comments on commit bf09e6b

Please sign in to comment.